aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
authorLogan Chien <loganchien@google.com>2011-12-16 09:08:45 +0800
committerLogan Chien <loganchien@google.com>2011-12-16 13:28:58 +0800
commita1e6e241a813f81be2d2f36ab60c950ca297574b (patch)
treecf2d7ec5c63f40e2b66d8be7737496719a0d7902 /lib/Target
parentac212abcc6d858470ad35ce7d660af0c1800364a (diff)
parentddecfe54a35ffbe0675f7f33e493734fd60b2495 (diff)
downloadexternal_llvm-a1e6e241a813f81be2d2f36ab60c950ca297574b.zip
external_llvm-a1e6e241a813f81be2d2f36ab60c950ca297574b.tar.gz
external_llvm-a1e6e241a813f81be2d2f36ab60c950ca297574b.tar.bz2
Merge with LLVM upstream r146714 (Dec 16th 2011)
Change-Id: Ied458adb08bf9a69250cbcee9b14b44d17e8701a
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp30
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp244
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h11
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp4
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp2
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp1001
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp148
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp94
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp6
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.cpp28
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.h8
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp54
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp64
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td192
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td147
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td1379
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td8
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td73
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td62
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp13
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp24
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h3
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp1
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp1143
-rw-r--r--lib/Target/ARM/AsmParser/CMakeLists.txt8
-rw-r--r--lib/Target/ARM/AsmParser/LLVMBuild.txt1
-rw-r--r--lib/Target/ARM/CMakeLists.txt14
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp171
-rw-r--r--lib/Target/ARM/Disassembler/CMakeLists.txt8
-rw-r--r--lib/Target/ARM/Disassembler/LLVMBuild.txt1
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp26
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h6
-rw-r--r--lib/Target/ARM/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/ARM/InstPrinter/LLVMBuild.txt1
-rw-r--r--lib/Target/ARM/LLVMBuild.txt4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp46
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp9
-rw-r--r--lib/Target/ARM/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/ARM/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp8
-rw-r--r--lib/Target/ARM/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/ARM/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp7
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp8
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp46
-rw-r--r--lib/Target/CBackend/CMakeLists.txt12
-rw-r--r--lib/Target/CBackend/CTargetMachine.h4
-rw-r--r--lib/Target/CBackend/LLVMBuild.txt4
-rw-r--r--lib/Target/CBackend/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/CBackend/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/CMakeLists.txt7
-rw-r--r--lib/Target/CellSPU/CMakeLists.txt12
-rw-r--r--lib/Target/CellSPU/LLVMBuild.txt4
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt5
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/CellSPU/SPUFrameLowering.cpp3
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp10
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp3
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.h2
-rw-r--r--lib/Target/CellSPU/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/CellSPU/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/CppBackend/CMakeLists.txt7
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h4
-rw-r--r--lib/Target/CppBackend/LLVMBuild.txt4
-rw-r--r--lib/Target/CppBackend/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/CppBackend/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt35
-rw-r--r--lib/Target/Hexagon/Hexagon.h54
-rw-r--r--lib/Target/Hexagon/Hexagon.td66
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp555
-rw-r--r--lib/Target/Hexagon/HexagonCFGOptimizer.cpp240
-rw-r--r--lib/Target/Hexagon/HexagonCallingConv.td35
-rw-r--r--lib/Target/Hexagon/HexagonCallingConvLower.cpp207
-rw-r--r--lib/Target/Hexagon/HexagonCallingConvLower.h189
-rw-r--r--lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp184
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp333
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.h50
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp644
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp1495
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp1505
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h162
-rw-r--r--lib/Target/Hexagon/HexagonImmediates.td491
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormats.td242
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormatsV4.td46
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp1459
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h166
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td3014
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV3.td134
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td3392
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsics.td3462
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsDerived.td29
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsV3.td50
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsV4.td369
-rw-r--r--lib/Target/Hexagon/HexagonMachineFunctionInfo.h75
-rw-r--r--lib/Target/Hexagon/HexagonOptimizeSZExtends.cpp129
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp323
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h89
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.td169
-rw-r--r--lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp85
-rw-r--r--lib/Target/Hexagon/HexagonSchedule.td53
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV4.td56
-rw-r--r--lib/Target/Hexagon/HexagonSelectCCInfo.td121
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp46
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.h40
-rw-r--r--lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp136
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp59
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h74
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp118
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h86
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.cpp94
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.h40
-rw-r--r--lib/Target/Hexagon/HexagonVarargsCallingConvention.h141
-rw-r--r--lib/Target/Hexagon/LLVMBuild.txt32
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt6
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp36
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h30
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp94
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h40
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt23
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/Makefile16
-rw-r--r--lib/Target/Hexagon/Makefile23
-rw-r--r--lib/Target/Hexagon/TargetInfo/CMakeLists.txt8
-rw-r--r--lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp19
-rw-r--r--lib/Target/Hexagon/TargetInfo/LLVMBuild.txt23
-rw-r--r--lib/Target/Hexagon/TargetInfo/Makefile15
-rw-r--r--lib/Target/LLVMBuild.txt3
-rw-r--r--lib/Target/MBlaze/AsmParser/CMakeLists.txt7
-rw-r--r--lib/Target/MBlaze/AsmParser/LLVMBuild.txt1
-rw-r--r--lib/Target/MBlaze/CMakeLists.txt13
-rw-r--r--lib/Target/MBlaze/Disassembler/CMakeLists.txt7
-rw-r--r--lib/Target/MBlaze/Disassembler/LLVMBuild.txt1
-rw-r--r--lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp18
-rw-r--r--lib/Target/MBlaze/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/MBlaze/InstPrinter/LLVMBuild.txt1
-rw-r--r--lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h2
-rw-r--r--lib/Target/MBlaze/LLVMBuild.txt4
-rw-r--r--lib/Target/MBlaze/MBlazeAsmPrinter.cpp4
-rw-r--r--lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp22
-rw-r--r--lib/Target/MBlaze/MBlazeFrameLowering.cpp17
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFormats.td24
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.td169
-rw-r--r--lib/Target/MBlaze/MBlazeMCInstLower.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.cpp18
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.h1
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp17
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h1
-rw-r--r--lib/Target/MBlaze/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/MBlaze/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/MSP430/CMakeLists.txt13
-rw-r--r--lib/Target/MSP430/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/MSP430/InstPrinter/LLVMBuild.txt1
-rw-r--r--lib/Target/MSP430/LLVMBuild.txt4
-rw-r--r--lib/Target/MSP430/MCTargetDesc/CMakeLists.txt8
-rw-r--r--lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.cpp4
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp4
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp9
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp3
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h2
-rw-r--r--lib/Target/MSP430/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/MSP430/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/Mips/CMakeLists.txt13
-rw-r--r--lib/Target/Mips/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/Mips/InstPrinter/LLVMBuild.txt1
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp4
-rw-r--r--lib/Target/Mips/LLVMBuild.txt4
-rw-r--r--lib/Target/Mips/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/Mips/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp85
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h11
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h102
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp15
-rw-r--r--lib/Target/Mips/Mips.td6
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td48
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp20
-rw-r--r--lib/Target/Mips/MipsCodeEmitter.cpp4
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp17
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp4
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp47
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp192
-rw-r--r--lib/Target/Mips/MipsISelLowering.h9
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td2
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp17
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h1
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td161
-rw-r--r--lib/Target/Mips/MipsMCInstLower.cpp37
-rw-r--r--lib/Target/Mips/MipsMCInstLower.h2
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp1
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td3
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp2
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp53
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h8
-rw-r--r--lib/Target/Mips/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/Mips/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/PTX/CMakeLists.txt14
-rw-r--r--lib/Target/PTX/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/PTX/InstPrinter/LLVMBuild.txt1
-rw-r--r--lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp47
-rw-r--r--lib/Target/PTX/LLVMBuild.txt4
-rw-r--r--lib/Target/PTX/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/PTX/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h71
-rw-r--r--lib/Target/PTX/PTXAsmPrinter.cpp69
-rw-r--r--lib/Target/PTX/PTXFPRoundingModePass.cpp6
-rw-r--r--lib/Target/PTX/PTXISelLowering.cpp35
-rw-r--r--lib/Target/PTX/PTXInstrInfo.cpp17
-rw-r--r--lib/Target/PTX/PTXInstrInfo.td12
-rw-r--r--lib/Target/PTX/PTXMFInfoExtract.cpp21
-rw-r--r--lib/Target/PTX/PTXMachineFunctionInfo.h154
-rw-r--r--lib/Target/PTX/PTXTargetMachine.cpp24
-rw-r--r--lib/Target/PTX/PTXTargetMachine.h6
-rw-r--r--lib/Target/PTX/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/PTX/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt14
-rw-r--r--lib/Target/PowerPC/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/PowerPC/InstPrinter/LLVMBuild.txt1
-rw-r--r--lib/Target/PowerPC/LLVMBuild.txt4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp10
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp5
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp6
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp18
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp179
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.h13
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp6
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp39
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td18
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp120
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h5
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td13
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp130
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp20
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h4
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp11
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h6
-rw-r--r--lib/Target/PowerPC/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/PowerPC/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/Sparc/CMakeLists.txt12
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp16
-rw-r--r--lib/Target/Sparc/LLVMBuild.txt4
-rw-r--r--lib/Target/Sparc/MCTargetDesc/CMakeLists.txt6
-rw-r--r--lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/Sparc/SparcAsmPrinter.cpp4
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp2
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp4
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp15
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h6
-rw-r--r--lib/Target/Sparc/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/Sparc/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/TargetFrameLowering.cpp45
-rw-r--r--lib/Target/TargetInstrInfo.cpp42
-rw-r--r--lib/Target/TargetLibraryInfo.cpp91
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp6
-rw-r--r--lib/Target/TargetMachine.cpp192
-rw-r--r--lib/Target/TargetRegisterInfo.cpp2
-rw-r--r--lib/Target/X86/AsmParser/CMakeLists.txt8
-rw-r--r--lib/Target/X86/AsmParser/LLVMBuild.txt1
-rw-r--r--lib/Target/X86/CMakeLists.txt15
-rw-r--r--lib/Target/X86/Disassembler/CMakeLists.txt6
-rw-r--r--lib/Target/X86/Disassembler/LLVMBuild.txt1
-rw-r--r--lib/Target/X86/InstPrinter/CMakeLists.txt6
-rw-r--r--lib/Target/X86/InstPrinter/LLVMBuild.txt1
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp244
-rw-r--r--lib/Target/X86/LLVMBuild.txt4
-rw-r--r--lib/Target/X86/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/X86/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp13
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h20
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp14
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h8
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp99
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp6
-rw-r--r--lib/Target/X86/README-SSE.txt13
-rw-r--r--lib/Target/X86/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/X86/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/X86/Utils/CMakeLists.txt5
-rw-r--r--lib/Target/X86/Utils/LLVMBuild.txt1
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp110
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h47
-rw-r--r--lib/Target/X86/X86.td18
-rw-r--r--lib/Target/X86/X86CallingConv.td13
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp2
-rw-r--r--lib/Target/X86/X86FastISel.cpp10
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp145
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp900
-rw-r--r--lib/Target/X86/X86ISelLowering.h25
-rw-r--r--lib/Target/X86/X86InstrFMA.td388
-rw-r--r--lib/Target/X86/X86InstrFormats.td37
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td30
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp37
-rw-r--r--lib/Target/X86/X86InstrInfo.td5
-rw-r--r--lib/Target/X86/X86InstrSSE.td801
-rw-r--r--lib/Target/X86/X86InstrXOP.td243
-rw-r--r--lib/Target/X86/X86JITInfo.cpp2
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp4
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp22
-rw-r--r--lib/Target/X86/X86Subtarget.cpp6
-rw-r--r--lib/Target/X86/X86Subtarget.h4
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp18
-rw-r--r--lib/Target/X86/X86TargetMachine.h6
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp2
-rw-r--r--lib/Target/XCore/CMakeLists.txt12
-rw-r--r--lib/Target/XCore/LLVMBuild.txt4
-rw-r--r--lib/Target/XCore/MCTargetDesc/CMakeLists.txt5
-rw-r--r--lib/Target/XCore/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/XCore/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/XCore/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/XCore/XCore.h3
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp3
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp9
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp2
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp5
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h2
319 files changed, 28184 insertions, 4263 deletions
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index bbca228..6ae287a 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -493,11 +493,21 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return false;
}
- // These modifiers are not yet supported.
- case 'p': // The high single-precision register of a VFP double-precision
- // register.
case 'e': // The low doubleword register of a NEON quad register.
- case 'f': // The high doubleword register of a NEON quad register.
+ case 'f': { // The high doubleword register of a NEON quad register.
+ if (!MI->getOperand(OpNum).isReg())
+ return true;
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ if (!ARM::QPRRegClass.contains(Reg))
+ return true;
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ?
+ ARM::dsub_0 : ARM::dsub_1);
+ O << ARMInstPrinter::getRegisterName(SubReg);
+ return false;
+ }
+
+ // These modifiers are not yet supported.
case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
case 'H': // The highest-numbered register of a pair.
return true;
@@ -739,14 +749,14 @@ void ARMAsmPrinter::emitAttributes() {
}
// Signal various FP modes.
- if (!UnsafeFPMath) {
+ if (!TM.Options.UnsafeFPMath) {
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal,
ARMBuildAttrs::Allowed);
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
ARMBuildAttrs::Allowed);
}
- if (NoInfsFPMath && NoNaNsFPMath)
+ if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath)
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
ARMBuildAttrs::Allowed);
else
@@ -759,7 +769,7 @@ void ARMAsmPrinter::emitAttributes() {
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
// Hard float. Use both S and D registers and conform to AAPCS-VFP.
- if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) {
+ if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) {
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3);
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1);
}
@@ -1069,7 +1079,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
}
// Try to figure out the unwinding opcode out of src / dst regs.
- if (MI->getDesc().mayStore()) {
+ if (MI->mayStore()) {
// Register saves.
assert(DstReg == ARM::SP &&
"Only stack pointer as a destination reg is supported");
@@ -1481,11 +1491,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
/// in the function. The first operand is the ID# for this instruction, the
/// second is the index into the MachineConstantPool that this is, the third
/// is the size in bytes of this constant pool entry.
+ /// The required alignment is specified on the basic block holding this MI.
unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
- EmitAlignment(2);
-
// Mark the constant pool entry as data if we're not already in a data
// region.
OutStreamer.EmitDataRegion();
@@ -1934,4 +1943,3 @@ extern "C" void LLVMInitializeARMAsmPrinter() {
RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget);
RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget);
}
-
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 9315348..8bf5475 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -146,7 +146,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MCID.getNumOperands();
- bool isLoad = !MCID.mayStore();
+ bool isLoad = !MI->mayStore();
const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
const MachineOperand &Base = MI->getOperand(2);
const MachineOperand &Offset = MI->getOperand(NumOps-3);
@@ -439,6 +439,22 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
return false;
}
+bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
+ if (MI->isBundle()) {
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ int PIdx = I->findFirstPredOperandIdx();
+ if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
+ return true;
+ }
+ return false;
+ }
+
+ int PIdx = MI->findFirstPredOperandIdx();
+ return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+}
+
bool ARMBaseInstrInfo::
PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const {
@@ -491,7 +507,7 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const {
// FIXME: This confuses implicit_def with optional CPSR def.
const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef())
+ if (!MCID.getImplicitDefs() && !MI->hasOptionalDef())
return false;
bool Found = false;
@@ -510,11 +526,10 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
/// By default, this returns true for every instruction with a
/// PredicateOperand.
bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isPredicable())
+ if (!MI->isPredicable())
return false;
- if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
+ if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
ARMFunctionInfo *AFI =
MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
return AFI->isThumb2Function();
@@ -548,7 +563,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
if (MI->isLabel())
return 0;
- unsigned Opc = MI->getOpcode();
+ unsigned Opc = MI->getOpcode();
switch (Opc) {
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
@@ -556,6 +571,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
case TargetOpcode::EH_LABEL:
case TargetOpcode::DBG_VALUE:
return 0;
+ case TargetOpcode::BUNDLE:
+ return getInstBundleLength(MI);
case ARM::MOVi16_ga_pcrel:
case ARM::MOVTi16_ga_pcrel:
case ARM::t2MOVi16_ga_pcrel:
@@ -593,7 +610,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
unsigned NumOps = MCID.getNumOperands();
MachineOperand JTOP =
- MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2));
+ MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2));
unsigned JTI = JTOP.getIndex();
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
assert(MJTI != 0);
@@ -622,6 +639,17 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return 0; // Not reached
}
+unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
+ unsigned Size = 0;
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ assert(!I->isBundle() && "No nested bundle!");
+ Size += GetInstSizeInBytes(&*I);
+ }
+ return Size;
+}
+
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -845,7 +873,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const {
const MachineMemOperand *Dummy;
- return MI->getDesc().mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
+ return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
}
void ARMBaseInstrInfo::
@@ -991,7 +1019,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const {
const MachineMemOperand *Dummy;
- return MI->getDesc().mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+ return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
}
bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
@@ -1357,7 +1385,7 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
return false;
// Terminators and labels can't be scheduled around.
- if (MI->getDesc().isTerminator() || MI->isLabel())
+ if (MI->isTerminator() || MI->isLabel())
return true;
// Treat the start of the IT block as a scheduling boundary, but schedule
@@ -1762,8 +1790,7 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
// Check that CPSR isn't set between the comparison instruction and the one we
// want to change.
- MachineBasicBlock::const_iterator I = CmpInstr, E = MI,
- B = MI->getParent()->begin();
+ MachineBasicBlock::iterator I = CmpInstr,E = MI, B = MI->getParent()->begin();
// Early exit if CmpInstr is at the beginning of the BB.
if (I == B) return false;
@@ -1957,7 +1984,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
bool isKill = UseMI->getOperand(OpIdx).isKill();
unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
- *UseMI, UseMI->getDebugLoc(),
+ UseMI, UseMI->getDebugLoc(),
get(NewUseOpc), NewReg)
.addReg(Reg1, getKillRegState(isKill))
.addImm(SOImmValV1)));
@@ -2332,6 +2359,59 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return UseCycle;
}
+static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
+ const MachineInstr *MI, unsigned Reg,
+ unsigned &DefIdx, unsigned &Dist) {
+ Dist = 0;
+
+ MachineBasicBlock::const_iterator I = MI; ++I;
+ MachineBasicBlock::const_instr_iterator II =
+ llvm::prior(I.getInstrIterator());
+ assert(II->isInsideBundle() && "Empty bundle?");
+
+ int Idx = -1;
+ while (II->isInsideBundle()) {
+ Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
+ if (Idx != -1)
+ break;
+ --II;
+ ++Dist;
+ }
+
+ assert(Idx != -1 && "Cannot find bundled definition!");
+ DefIdx = Idx;
+ return II;
+}
+
+static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
+ const MachineInstr *MI, unsigned Reg,
+ unsigned &UseIdx, unsigned &Dist) {
+ Dist = 0;
+
+ MachineBasicBlock::const_instr_iterator II = MI; ++II;
+ assert(II->isInsideBundle() && "Empty bundle?");
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+
+ // FIXME: This doesn't properly handle multiple uses.
+ int Idx = -1;
+ while (II != E && II->isInsideBundle()) {
+ Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
+ if (Idx != -1)
+ break;
+ if (II->getOpcode() != ARM::t2IT)
+ ++Dist;
+ ++II;
+ }
+
+ if (Idx == -1) {
+ Dist = 0;
+ return 0;
+ }
+
+ UseIdx = Idx;
+ return II;
+}
+
int
ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI, unsigned DefIdx,
@@ -2340,35 +2420,77 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
DefMI->isRegSequence() || DefMI->isImplicitDef())
return 1;
- const MCInstrDesc &DefMCID = DefMI->getDesc();
if (!ItinData || ItinData->isEmpty())
- return DefMCID.mayLoad() ? 3 : 1;
+ return DefMI->mayLoad() ? 3 : 1;
- const MCInstrDesc &UseMCID = UseMI->getDesc();
+ const MCInstrDesc *DefMCID = &DefMI->getDesc();
+ const MCInstrDesc *UseMCID = &UseMI->getDesc();
const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
- if (DefMO.getReg() == ARM::CPSR) {
+ unsigned Reg = DefMO.getReg();
+ if (Reg == ARM::CPSR) {
if (DefMI->getOpcode() == ARM::FMSTAT) {
// fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
return Subtarget.isCortexA9() ? 1 : 20;
}
// CPSR set and branch can be paired in the same cycle.
- if (UseMCID.isBranch())
+ if (UseMI->isBranch())
return 0;
+
+ // Otherwise it takes the instruction latency (generally one).
+ int Latency = getInstrLatency(ItinData, DefMI);
+
+ // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
+ // its uses. Instructions which are otherwise scheduled between them may
+ // incur a code size penalty (not able to use the CPSR setting 16-bit
+ // instructions).
+ if (Latency > 0 && Subtarget.isThumb2()) {
+ const MachineFunction *MF = DefMI->getParent()->getParent();
+ if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ --Latency;
+ }
+ return Latency;
}
unsigned DefAlign = DefMI->hasOneMemOperand()
? (*DefMI->memoperands_begin())->getAlignment() : 0;
unsigned UseAlign = UseMI->hasOneMemOperand()
? (*UseMI->memoperands_begin())->getAlignment() : 0;
- int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
- UseMCID, UseIdx, UseAlign);
+
+ unsigned DefAdj = 0;
+ if (DefMI->isBundle()) {
+ DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
+ if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+ DefMI->isRegSequence() || DefMI->isImplicitDef())
+ return 1;
+ DefMCID = &DefMI->getDesc();
+ }
+ unsigned UseAdj = 0;
+ if (UseMI->isBundle()) {
+ unsigned NewUseIdx;
+ const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
+ Reg, NewUseIdx, UseAdj);
+ if (NewUseMI) {
+ UseMI = NewUseMI;
+ UseIdx = NewUseIdx;
+ UseMCID = &UseMI->getDesc();
+ }
+ }
+
+ int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
+ *UseMCID, UseIdx, UseAlign);
+ int Adj = DefAdj + UseAdj;
+ if (Adj) {
+ Latency -= (int)(DefAdj + UseAdj);
+ if (Latency < 1)
+ return 1;
+ }
if (Latency > 1 &&
(Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
// FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
// variants are one cycle cheaper.
- switch (DefMCID.getOpcode()) {
+ switch (DefMCID->getOpcode()) {
default: break;
case ARM::LDRrs:
case ARM::LDRBrs: {
@@ -2393,7 +2515,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
}
if (DefAlign < 8 && Subtarget.isCortexA9())
- switch (DefMCID.getOpcode()) {
+ switch (DefMCID->getOpcode()) {
default: break;
case ARM::VLD1q8:
case ARM::VLD1q16:
@@ -2413,12 +2535,18 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD2q8:
case ARM::VLD2q16:
case ARM::VLD2q32:
- case ARM::VLD2d8_UPD:
- case ARM::VLD2d16_UPD:
- case ARM::VLD2d32_UPD:
- case ARM::VLD2q8_UPD:
- case ARM::VLD2q16_UPD:
- case ARM::VLD2q32_UPD:
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2q8wb_fixed:
+ case ARM::VLD2q16wb_fixed:
+ case ARM::VLD2q32wb_fixed:
+ case ARM::VLD2d8wb_register:
+ case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_register:
+ case ARM::VLD2q8wb_register:
+ case ARM::VLD2q16wb_register:
+ case ARM::VLD2q32wb_register:
case ARM::VLD3d8:
case ARM::VLD3d16:
case ARM::VLD3d32:
@@ -2446,9 +2574,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD1DUPq8:
case ARM::VLD1DUPq16:
case ARM::VLD1DUPq32:
- case ARM::VLD1DUPq8_UPD:
- case ARM::VLD1DUPq16_UPD:
- case ARM::VLD1DUPq32_UPD:
+ case ARM::VLD1DUPq8wb_fixed:
+ case ARM::VLD1DUPq16wb_fixed:
+ case ARM::VLD1DUPq32wb_fixed:
+ case ARM::VLD1DUPq8wb_register:
+ case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq32wb_register:
case ARM::VLD2DUPd8:
case ARM::VLD2DUPd16:
case ARM::VLD2DUPd32:
@@ -2580,12 +2711,18 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD2q8Pseudo:
case ARM::VLD2q16Pseudo:
case ARM::VLD2q32Pseudo:
- case ARM::VLD2d8Pseudo_UPD:
- case ARM::VLD2d16Pseudo_UPD:
- case ARM::VLD2d32Pseudo_UPD:
- case ARM::VLD2q8Pseudo_UPD:
- case ARM::VLD2q16Pseudo_UPD:
- case ARM::VLD2q32Pseudo_UPD:
+ case ARM::VLD2d8PseudoWB_fixed:
+ case ARM::VLD2d16PseudoWB_fixed:
+ case ARM::VLD2d32PseudoWB_fixed:
+ case ARM::VLD2q8PseudoWB_fixed:
+ case ARM::VLD2q16PseudoWB_fixed:
+ case ARM::VLD2q32PseudoWB_fixed:
+ case ARM::VLD2d8PseudoWB_register:
+ case ARM::VLD2d16PseudoWB_register:
+ case ARM::VLD2d32PseudoWB_register:
+ case ARM::VLD2q8PseudoWB_register:
+ case ARM::VLD2q16PseudoWB_register:
+ case ARM::VLD2q32PseudoWB_register:
case ARM::VLD3d8Pseudo:
case ARM::VLD3d16Pseudo:
case ARM::VLD3d32Pseudo:
@@ -2621,9 +2758,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD1DUPq8Pseudo:
case ARM::VLD1DUPq16Pseudo:
case ARM::VLD1DUPq32Pseudo:
- case ARM::VLD1DUPq8Pseudo_UPD:
- case ARM::VLD1DUPq16Pseudo_UPD:
- case ARM::VLD1DUPq32Pseudo_UPD:
+ case ARM::VLD1DUPq8PseudoWB_fixed:
+ case ARM::VLD1DUPq16PseudoWB_fixed:
+ case ARM::VLD1DUPq32PseudoWB_fixed:
+ case ARM::VLD1DUPq8PseudoWB_register:
+ case ARM::VLD1DUPq16PseudoWB_register:
+ case ARM::VLD1DUPq32PseudoWB_register:
case ARM::VLD2DUPd8Pseudo:
case ARM::VLD2DUPd16Pseudo:
case ARM::VLD2DUPd32Pseudo:
@@ -2671,6 +2811,19 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
+unsigned
+ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *DepMI) const {
+ unsigned Reg = DefMI->getOperand(DefIdx).getReg();
+ if (DepMI->readsRegister(Reg, &getRegisterInfo()) || !isPredicated(DepMI))
+ return 1;
+
+ // If the second MI is predicated, then there is an implicit use dependency.
+ return getOperandLatency(ItinData, DefMI, DefIdx, DepMI,
+ DepMI->getNumOperands());
+}
+
int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
@@ -2681,6 +2834,17 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
if (!ItinData || ItinData->isEmpty())
return 1;
+ if (MI->isBundle()) {
+ int Latency = 0;
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ if (I->getOpcode() != ARM::t2IT)
+ Latency += getInstrLatency(ItinData, I, PredCost);
+ }
+ return Latency;
+ }
+
const MCInstrDesc &MCID = MI->getDesc();
unsigned Class = MCID.getSchedClass();
unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 0f9f321..68e8208 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -69,10 +69,7 @@ public:
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
// Predication support.
- bool isPredicated(const MachineInstr *MI) const {
- int PIdx = MI->findFirstPredOperandIdx();
- return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
- }
+ bool isPredicated(const MachineInstr *MI) const;
ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
int PIdx = MI->findFirstPredOperandIdx();
@@ -213,12 +210,18 @@ public:
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode, unsigned UseIdx) const;
+ virtual unsigned getOutputLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *DepMI) const;
+
/// VFP/NEON execution domains.
std::pair<uint16_t, uint16_t>
getExecutionDomain(const MachineInstr *MI) const;
void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
private:
+ unsigned getInstBundleLength(const MachineInstr *MI) const;
+
int getVLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
unsigned DefClass,
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 7c42342..8ee6ce2 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -631,7 +631,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
// 1. Dynamic stack realignment is explicitly disabled,
// 2. This is a Thumb1 function (it's not useful, so we don't bother), or
// 3. There are VLAs in the function and the base pointer is disabled.
- return (RealignStack && !AFI->isThumb1OnlyFunction() &&
+ return (MF.getTarget().Options.RealignStack && !AFI->isThumb1OnlyFunction() &&
(!MFI->hasVarSizedObjects() || EnableBasePointer));
}
@@ -649,7 +649,7 @@ needsStackRealignment(const MachineFunction &MF) const {
bool ARMBaseRegisterInfo::
cannotEliminateFrame(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- if (DisableFramePointerElim(MF) && MFI->adjustsStack())
+ if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack())
return true;
return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
|| needsStackRealignment(MF);
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index d74ccfa..365f0bb 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -401,7 +401,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB) {
MCE.StartMachineBasicBlock(MBB);
- for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
I != E; ++I)
emitInstruction(*I);
}
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 3e3a413..2039d41 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -26,6 +26,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -51,6 +52,43 @@ static cl::opt<bool>
AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
cl::desc("Adjust basic block layout to better use TB[BH]"));
+static cl::opt<bool>
+AlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true),
+ cl::desc("Align constant islands in code"));
+
+/// UnknownPadding - Return the worst case padding that could result from
+/// unknown offset bits. This does not include alignment padding caused by
+/// known offset bits.
+///
+/// @param LogAlign log2(alignment)
+/// @param KnownBits Number of known low offset bits.
+static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
+ if (KnownBits < LogAlign)
+ return (1u << LogAlign) - (1u << KnownBits);
+ return 0;
+}
+
+/// WorstCaseAlign - Assuming only the low KnownBits bits in Offset are exact,
+/// add padding such that:
+///
+/// 1. The result is aligned to 1 << LogAlign.
+///
+/// 2. No other value of the unknown bits would require more padding.
+///
+/// This may add more padding than is required to satisfy just one of the
+/// constraints. It is necessary to compute alignment this way to guarantee
+/// that we don't underestimate the padding before an aligned block. If the
+/// real padding before a block is larger than we think, constant pool entries
+/// may go out of range.
+static inline unsigned WorstCaseAlign(unsigned Offset, unsigned LogAlign,
+ unsigned KnownBits) {
+ // Add the worst possible padding that the unknown bits could cause.
+ Offset += UnknownPadding(LogAlign, KnownBits);
+
+ // Then align the result.
+ return RoundUpToAlignment(Offset, 1u << LogAlign);
+}
+
namespace {
/// ARMConstantIslands - Due to limited PC-relative displacements, ARM
/// requires constant pool entries to be scattered among the instructions
@@ -64,16 +102,70 @@ namespace {
/// CPE - A constant pool entry that has been placed somewhere, which
/// tracks a list of users.
class ARMConstantIslands : public MachineFunctionPass {
- /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
- /// by MBB Number. The two-byte pads required for Thumb alignment are
- /// counted as part of the following block (i.e., the offset and size for
- /// a padded block will both be ==2 mod 4).
- std::vector<unsigned> BBSizes;
+ /// BasicBlockInfo - Information about the offset and size of a single
+ /// basic block.
+ struct BasicBlockInfo {
+ /// Offset - Distance from the beginning of the function to the beginning
+ /// of this basic block.
+ ///
+ /// The offset is always aligned as required by the basic block.
+ unsigned Offset;
+
+ /// Size - Size of the basic block in bytes. If the block contains
+ /// inline assembly, this is a worst case estimate.
+ ///
+ /// The size does not include any alignment padding whether from the
+ /// beginning of the block, or from an aligned jump table at the end.
+ unsigned Size;
+
+ /// KnownBits - The number of low bits in Offset that are known to be
+ /// exact. The remaining bits of Offset are an upper bound.
+ uint8_t KnownBits;
+
+ /// Unalign - When non-zero, the block contains instructions (inline asm)
+ /// of unknown size. The real size may be smaller than Size bytes by a
+ /// multiple of 1 << Unalign.
+ uint8_t Unalign;
+
+ /// PostAlign - When non-zero, the block terminator contains a .align
+ /// directive, so the end of the block is aligned to 1 << PostAlign
+ /// bytes.
+ uint8_t PostAlign;
+
+ BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0),
+ PostAlign(0) {}
+
+ /// Compute the number of known offset bits internally to this block.
+ /// This number should be used to predict worst case padding when
+ /// splitting the block.
+ unsigned internalKnownBits() const {
+ return Unalign ? Unalign : KnownBits;
+ }
+
+ /// Compute the offset immediately following this block. If LogAlign is
+ /// specified, return the offset the successor block will get if it has
+ /// this alignment.
+ unsigned postOffset(unsigned LogAlign = 0) const {
+ unsigned PO = Offset + Size;
+ unsigned LA = std::max(unsigned(PostAlign), LogAlign);
+ if (!LA)
+ return PO;
+ // Add alignment padding from the terminator.
+ return WorstCaseAlign(PO, LA, internalKnownBits());
+ }
+
+ /// Compute the number of known low bits of postOffset. If this block
+ /// contains inline asm, the number of known bits drops to the
+ /// instruction alignment. An aligned terminator may increase the number
+ /// of know bits.
+ /// If LogAlign is given, also consider the alignment of the next block.
+ unsigned postKnownBits(unsigned LogAlign = 0) const {
+ return std::max(std::max(unsigned(PostAlign), LogAlign),
+ internalKnownBits());
+ }
+ };
- /// BBOffsets - the offset of each MBB in bytes, starting from 0.
- /// The two-byte pads required for Thumb alignment are counted as part of
- /// the following block.
- std::vector<unsigned> BBOffsets;
+ std::vector<BasicBlockInfo> BBInfo;
/// WaterList - A sorted list of basic blocks where islands could be placed
/// (i.e. blocks that don't fall through to the following block, due
@@ -162,9 +254,8 @@ namespace {
/// the branch fix up pass.
bool HasFarJump;
- /// HasInlineAsm - True if the function contains inline assembly.
- bool HasInlineAsm;
-
+ MachineFunction *MF;
+ MachineConstantPool *MCP;
const ARMInstrInfo *TII;
const ARMSubtarget *STI;
ARMFunctionInfo *AFI;
@@ -182,67 +273,65 @@ namespace {
}
private:
- void DoInitialPlacement(MachineFunction &MF,
- std::vector<MachineInstr*> &CPEMIs);
+ void DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
- void JumpTableFunctionScan(MachineFunction &MF);
- void InitialFunctionScan(MachineFunction &MF,
- const std::vector<MachineInstr*> &CPEMIs);
+ unsigned getCPELogAlign(const MachineInstr *CPEMI);
+ void JumpTableFunctionScan();
+ void InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs);
MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
- void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta);
+ void AdjustBBOffsetsAfter(MachineBasicBlock *BB);
bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI);
int LookForExistingCPEntry(CPUser& U, unsigned UserOffset);
bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter);
void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset,
MachineBasicBlock *&NewMBB);
- bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex);
+ bool HandleConstantPoolUser(unsigned CPUserIndex);
void RemoveDeadCPEMI(MachineInstr *CPEMI);
bool RemoveUnusedCPEntries();
bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
MachineInstr *CPEMI, unsigned Disp, bool NegOk,
bool DoDump = false);
bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water,
- CPUser &U);
- bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
- unsigned Disp, bool NegativeOK, bool IsSoImm = false);
+ CPUser &U, unsigned &Growth);
bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
- bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br);
- bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br);
- bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br);
+ bool FixUpImmediateBr(ImmBranch &Br);
+ bool FixUpConditionalBr(ImmBranch &Br);
+ bool FixUpUnconditionalBr(ImmBranch &Br);
bool UndoLRSpillRestore();
- bool OptimizeThumb2Instructions(MachineFunction &MF);
- bool OptimizeThumb2Branches(MachineFunction &MF);
- bool ReorderThumb2JumpTables(MachineFunction &MF);
- bool OptimizeThumb2JumpTables(MachineFunction &MF);
+ bool OptimizeThumb2Instructions();
+ bool OptimizeThumb2Branches();
+ bool ReorderThumb2JumpTables();
+ bool OptimizeThumb2JumpTables();
MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB,
MachineBasicBlock *JTBB);
+ void ComputeBlockSize(MachineBasicBlock *MBB);
unsigned GetOffsetOf(MachineInstr *MI) const;
void dumpBBs();
- void verify(MachineFunction &MF);
+ void verify();
+
+ bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
+ unsigned Disp, bool NegativeOK, bool IsSoImm = false);
+ bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
+ const CPUser &U) {
+ return OffsetIsInRange(UserOffset, TrialOffset,
+ U.MaxDisp, U.NegOk, U.IsSoImm);
+ }
};
char ARMConstantIslands::ID = 0;
}
/// verify - check BBOffsets, BBSizes, alignment of islands
-void ARMConstantIslands::verify(MachineFunction &MF) {
- assert(BBOffsets.size() == BBSizes.size());
- for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i)
- assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]);
- if (!isThumb)
- return;
+void ARMConstantIslands::verify() {
#ifndef NDEBUG
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock *MBB = MBBI;
- if (!MBB->empty() &&
- MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
- unsigned MBBId = MBB->getNumber();
- assert(HasInlineAsm ||
- (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) ||
- (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0));
- }
+ unsigned Align = MBB->getAlignment();
+ unsigned MBBId = MBB->getNumber();
+ assert(BBInfo[MBBId].Offset % (1u << Align) == 0);
+ assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
}
for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
CPUser &U = CPUsers[i];
@@ -257,10 +346,16 @@ void ARMConstantIslands::verify(MachineFunction &MF) {
/// print block size and offset information - debugging
void ARMConstantIslands::dumpBBs() {
- for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) {
- DEBUG(errs() << "block " << J << " offset " << BBOffsets[J]
- << " size " << BBSizes[J] << "\n");
- }
+ DEBUG({
+ for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
+ const BasicBlockInfo &BBI = BBInfo[J];
+ dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
+ << " kb=" << unsigned(BBI.KnownBits)
+ << " ua=" << unsigned(BBI.Unalign)
+ << " pa=" << unsigned(BBI.PostAlign)
+ << format(" size=%#x\n", BBInfo[J].Size);
+ }
+ });
}
/// createARMConstantIslandPass - returns an instance of the constpool
@@ -269,34 +364,38 @@ FunctionPass *llvm::createARMConstantIslandPass() {
return new ARMConstantIslands();
}
-bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
- MachineConstantPool &MCP = *MF.getConstantPool();
+bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ MCP = mf.getConstantPool();
- TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo();
- AFI = MF.getInfo<ARMFunctionInfo>();
- STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
+ DEBUG(dbgs() << "***** ARMConstantIslands: "
+ << MCP->getConstants().size() << " CP entries, aligned to "
+ << MCP->getConstantPoolAlignment() << " bytes *****\n");
+
+ TII = (const ARMInstrInfo*)MF->getTarget().getInstrInfo();
+ AFI = MF->getInfo<ARMFunctionInfo>();
+ STI = &MF->getTarget().getSubtarget<ARMSubtarget>();
isThumb = AFI->isThumbFunction();
isThumb1 = AFI->isThumb1OnlyFunction();
isThumb2 = AFI->isThumb2Function();
HasFarJump = false;
- HasInlineAsm = false;
// Renumber all of the machine basic blocks in the function, guaranteeing that
// the numbers agree with the position of the block in the function.
- MF.RenumberBlocks();
+ MF->RenumberBlocks();
// Try to reorder and otherwise adjust the block layout to make good use
// of the TB[BH] instructions.
bool MadeChange = false;
if (isThumb2 && AdjustJumpTableBlocks) {
- JumpTableFunctionScan(MF);
- MadeChange |= ReorderThumb2JumpTables(MF);
+ JumpTableFunctionScan();
+ MadeChange |= ReorderThumb2JumpTables();
// Data is out of date, so clear it. It'll be re-computed later.
T2JumpTables.clear();
// Blocks may have shifted around. Keep the numbering up to date.
- MF.RenumberBlocks();
+ MF->RenumberBlocks();
}
// Thumb1 functions containing constant pools get 4-byte alignment.
@@ -304,16 +403,13 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// ARM and Thumb2 functions need to be 4-byte aligned.
if (!isThumb1)
- MF.EnsureAlignment(2); // 2 = log2(4)
+ MF->EnsureAlignment(2); // 2 = log2(4)
// Perform the initial placement of the constant pool entries. To start with,
// we put them all at the end of the function.
std::vector<MachineInstr*> CPEMIs;
- if (!MCP.isEmpty()) {
- DoInitialPlacement(MF, CPEMIs);
- if (isThumb1)
- MF.EnsureAlignment(2); // 2 = log2(4)
- }
+ if (!MCP->isEmpty())
+ DoInitialPlacement(CPEMIs);
/// The next UID to take is the first unused one.
AFI->initPICLabelUId(CPEMIs.size());
@@ -321,7 +417,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// Do the initial scan of the function, building up information about the
// sizes of each block, the location of all the water, and finding all of the
// constant pool users.
- InitialFunctionScan(MF, CPEMIs);
+ InitialFunctionScan(CPEMIs);
CPEMIs.clear();
DEBUG(dumpBBs());
@@ -333,9 +429,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// is no change.
unsigned NoCPIters = 0, NoBRIters = 0;
while (true) {
+ DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
bool CPChange = false;
for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
- CPChange |= HandleConstantPoolUser(MF, i);
+ CPChange |= HandleConstantPoolUser(i);
if (CPChange && ++NoCPIters > 30)
llvm_unreachable("Constant Island pass failed to converge!");
DEBUG(dumpBBs());
@@ -344,9 +441,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// appear as "new water" for the next iteration of constant pool placement.
NewWaterList.clear();
+ DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n');
bool BRChange = false;
for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
- BRChange |= FixUpImmediateBr(MF, ImmBranches[i]);
+ BRChange |= FixUpImmediateBr(ImmBranches[i]);
if (BRChange && ++NoBRIters > 30)
llvm_unreachable("Branch Fix Up pass failed to converge!");
DEBUG(dumpBBs());
@@ -358,10 +456,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// Shrink 32-bit Thumb2 branch, load, and store instructions.
if (isThumb2 && !STI->prefers32BitThumb())
- MadeChange |= OptimizeThumb2Instructions(MF);
+ MadeChange |= OptimizeThumb2Instructions();
// After a while, this might be made debug-only, but it is not expensive.
- verify(MF);
+ verify();
// If LR has been forced spilled and no far jump (i.e. BL) has been issued,
// undo the spill / restore of LR if possible.
@@ -376,10 +474,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
}
}
- DEBUG(errs() << '\n'; dumpBBs());
+ DEBUG(dbgs() << '\n'; dumpBBs());
- BBSizes.clear();
- BBOffsets.clear();
+ BBInfo.clear();
WaterList.clear();
CPUsers.clear();
CPEntries.clear();
@@ -392,37 +489,65 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
/// DoInitialPlacement - Perform the initial placement of the constant pool
/// entries. To start with, we put them all at the end of the function.
-void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
- std::vector<MachineInstr*> &CPEMIs) {
+void
+ARMConstantIslands::DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
// Create the basic block to hold the CPE's.
- MachineBasicBlock *BB = MF.CreateMachineBasicBlock();
- MF.push_back(BB);
+ MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
+ MF->push_back(BB);
+
+ // MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
+ unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
+
+ // Mark the basic block as required by the const-pool.
+ // If AlignConstantIslands isn't set, use 4-byte alignment for everything.
+ BB->setAlignment(AlignConstantIslands ? MaxAlign : 2);
+
+ // The function needs to be as aligned as the basic blocks. The linker may
+ // move functions around based on their alignment.
+ MF->EnsureAlignment(BB->getAlignment());
+
+ // Order the entries in BB by descending alignment. That ensures correct
+ // alignment of all entries as long as BB is sufficiently aligned. Keep
+ // track of the insertion point for each alignment. We are going to bucket
+ // sort the entries as they are created.
+ SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end());
// Add all of the constants from the constant pool to the end block, use an
// identity mapping of CPI's to CPE's.
- const std::vector<MachineConstantPoolEntry> &CPs =
- MF.getConstantPool()->getConstants();
+ const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
- const TargetData &TD = *MF.getTarget().getTargetData();
+ const TargetData &TD = *MF->getTarget().getTargetData();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
- // Verify that all constant pool entries are a multiple of 4 bytes. If not,
- // we would have to pad them out or something so that instructions stay
- // aligned.
- assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
+ assert(Size >= 4 && "Too small constant pool entry");
+ unsigned Align = CPs[i].getAlignment();
+ assert(isPowerOf2_32(Align) && "Invalid alignment");
+ // Verify that all constant pool entries are a multiple of their alignment.
+ // If not, we would have to pad them out so that instructions stay aligned.
+ assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!");
+
+ // Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
+ unsigned LogAlign = Log2_32(Align);
+ MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
MachineInstr *CPEMI =
- BuildMI(BB, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+ BuildMI(*BB, InsAt, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
.addImm(i).addConstantPoolIndex(i).addImm(Size);
CPEMIs.push_back(CPEMI);
+ // Ensure that future entries with higher alignment get inserted before
+ // CPEMI. This is bucket sort with iterators.
+ for (unsigned a = LogAlign + 1; a < MaxAlign; ++a)
+ if (InsPoint[a] == InsAt)
+ InsPoint[a] = CPEMI;
+
// Add a new CPEntry, but no corresponding CPUser yet.
std::vector<CPEntry> CPEs;
CPEs.push_back(CPEntry(CPEMI, i));
CPEntries.push_back(CPEs);
++NumCPEs;
- DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i
- << "\n");
+ DEBUG(dbgs() << "Moved CPI#" << i << " to end of function\n");
}
+ DEBUG(BB->dump());
}
/// BBHasFallthrough - Return true if the specified basic block can fallthrough
@@ -458,17 +583,33 @@ ARMConstantIslands::CPEntry
return NULL;
}
+/// getCPELogAlign - Returns the required alignment of the constant pool entry
+/// represented by CPEMI. Alignment is measured in log2(bytes) units.
+unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
+ assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY);
+
+ // Everything is 4-byte aligned unless AlignConstantIslands is set.
+ if (!AlignConstantIslands)
+ return 2;
+
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
+ unsigned Align = MCP->getConstants()[CPI].getAlignment();
+ assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
+ return Log2_32(Align);
+}
+
/// JumpTableFunctionScan - Do a scan of the function, building up
/// information about the sizes of each block and the locations of all
/// the jump tables.
-void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+void ARMConstantIslands::JumpTableFunctionScan() {
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock &MBB = *MBBI;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I)
- if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT)
+ if (I->isBranch() && I->getOpcode() == ARM::t2BR_JT)
T2JumpTables.push_back(I);
}
}
@@ -476,23 +617,27 @@ void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
/// InitialFunctionScan - Do the initial scan of the function, building up
/// information about the sizes of each block, the location of all the water,
/// and finding all of the constant pool users.
-void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
- const std::vector<MachineInstr*> &CPEMIs) {
- // First thing, see if the function has any inline assembly in it. If so,
- // we have to be conservative about alignment assumptions, as we don't
- // know for sure the size of any instructions in the inline assembly.
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
- MBBI != E; ++MBBI) {
- MachineBasicBlock &MBB = *MBBI;
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I)
- if (I->getOpcode() == ARM::INLINEASM)
- HasInlineAsm = true;
- }
+void ARMConstantIslands::
+InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs) {
+ BBInfo.clear();
+ BBInfo.resize(MF->getNumBlockIDs());
+
+ // First thing, compute the size of all basic blocks, and see if the function
+ // has any inline assembly in it. If so, we have to be conservative about
+ // alignment assumptions, as we don't know for sure the size of any
+ // instructions in the inline assembly.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
+ ComputeBlockSize(I);
+
+ // The known bits of the entry block offset are determined by the function
+ // alignment.
+ BBInfo.front().KnownBits = MF->getAlignment();
+
+ // Compute block offsets and known bits.
+ AdjustBBOffsetsAfter(MF->begin());
// Now go back through the instructions and build up our data structures.
- unsigned Offset = 0;
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock &MBB = *MBBI;
@@ -501,16 +646,13 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
if (!BBHasFallthrough(&MBB))
WaterList.push_back(&MBB);
- unsigned MBBSize = 0;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
if (I->isDebugValue())
continue;
- // Add instruction size to MBBSize.
- MBBSize += TII->GetInstSizeInBytes(I);
int Opc = I->getOpcode();
- if (I->getDesc().isBranch()) {
+ if (I->isBranch()) {
bool isCond = false;
unsigned Bits = 0;
unsigned Scale = 1;
@@ -518,18 +660,6 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
switch (Opc) {
default:
continue; // Ignore other JT branches
- case ARM::tBR_JTr:
- // A Thumb1 table jump may involve padding; for the offsets to
- // be right, functions containing these must be 4-byte aligned.
- // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
- // table entries. So this code checks whether offset of tBR_JTr + 2
- // is aligned. That is held in Offset+MBBSize, which already has
- // 2 added in for the size of the mov pc instruction.
- MF.EnsureAlignment(2U);
- if ((Offset+MBBSize)%4 != 0 || HasInlineAsm)
- // FIXME: Add a pseudo ALIGN instruction instead.
- MBBSize += 2; // padding
- continue; // Does not get an entry in ImmBranches
case ARM::t2BR_JT:
T2JumpTables.push_back(I);
continue; // Does not get an entry in ImmBranches
@@ -647,18 +777,30 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
break;
}
}
+ }
+}
- // In thumb mode, if this block is a constpool island, we may need padding
- // so it's aligned on 4 byte boundary.
- if (isThumb &&
- !MBB.empty() &&
- MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY &&
- ((Offset%4) != 0 || HasInlineAsm))
- MBBSize += 2;
-
- BBSizes.push_back(MBBSize);
- BBOffsets.push_back(Offset);
- Offset += MBBSize;
+/// ComputeBlockSize - Compute the size and some alignment information for MBB.
+/// This function updates BBInfo directly.
+void ARMConstantIslands::ComputeBlockSize(MachineBasicBlock *MBB) {
+ BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
+ BBI.Size = 0;
+ BBI.Unalign = 0;
+ BBI.PostAlign = 0;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+ BBI.Size += TII->GetInstSizeInBytes(I);
+ // For inline asm, GetInstSizeInBytes returns a conservative estimate.
+ // The actual size may be smaller, but still a multiple of the instr size.
+ if (I->isInlineAsm())
+ BBI.Unalign = isThumb ? 1 : 2;
+ }
+
+ // tBR_JTr contains a .align 2 directive.
+ if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
+ BBI.PostAlign = 2;
+ MBB->getParent()->EnsureAlignment(2);
}
}
@@ -671,14 +813,7 @@ unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
// The offset is composed of two things: the sum of the sizes of all MBB's
// before this instruction's block, and the offset from the start of the block
// it is in.
- unsigned Offset = BBOffsets[MBB->getNumber()];
-
- // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has
- // alignment padding, and compensate if so.
- if (isThumb &&
- MI->getOpcode() == ARM::CONSTPOOL_ENTRY &&
- (Offset%4 != 0 || HasInlineAsm))
- Offset += 2;
+ unsigned Offset = BBInfo[MBB->getNumber()].Offset;
// Sum instructions before MI in MBB.
for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
@@ -702,12 +837,9 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
// Renumber the MBB's to keep them consecutive.
NewBB->getParent()->RenumberBlocks(NewBB);
- // Insert a size into BBSizes to align it properly with the (newly
+ // Insert an entry into BBInfo to align it properly with the (newly
// renumbered) block numbers.
- BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
-
- // Likewise for BBOffsets.
- BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
// Next, update WaterList. Specifically, we need to add NewMBB as having
// available water after it.
@@ -723,13 +855,12 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
/// account for this change and returns the newly created block.
MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
MachineBasicBlock *OrigBB = MI->getParent();
- MachineFunction &MF = *OrigBB->getParent();
// Create a new MBB for the code after the OrigBB.
MachineBasicBlock *NewBB =
- MF.CreateMachineBasicBlock(OrigBB->getBasicBlock());
+ MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
MachineFunction::iterator MBBI = OrigBB; ++MBBI;
- MF.insert(MBBI, NewBB);
+ MF->insert(MBBI, NewBB);
// Splice the instructions starting with MI over to NewBB.
NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
@@ -747,16 +878,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
++NumSplit;
// Update the CFG. All succs of OrigBB are now succs of NewBB.
- while (!OrigBB->succ_empty()) {
- MachineBasicBlock *Succ = *OrigBB->succ_begin();
- OrigBB->removeSuccessor(Succ);
- NewBB->addSuccessor(Succ);
-
- // This pass should be run after register allocation, so there should be no
- // PHI nodes to update.
- assert((Succ->empty() || !Succ->begin()->isPHI())
- && "PHI nodes should be eliminated by now!");
- }
+ NewBB->transferSuccessors(OrigBB);
// OrigBB branches to NewBB.
OrigBB->addSuccessor(NewBB);
@@ -764,14 +886,11 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
// Update internal data structures to account for the newly inserted MBB.
// This is almost the same as UpdateForInsertedWaterBlock, except that
// the Water goes after OrigBB, not NewBB.
- MF.RenumberBlocks(NewBB);
+ MF->RenumberBlocks(NewBB);
- // Insert a size into BBSizes to align it properly with the (newly
+ // Insert an entry into BBInfo to align it properly with the (newly
// renumbered) block numbers.
- BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
-
- // Likewise for BBOffsets.
- BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
// Next, update WaterList. Specifically, we need to add OrigMBB as having
// available water after it (but not if it's already there, which happens
@@ -787,54 +906,19 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
WaterList.insert(IP, OrigBB);
NewWaterList.insert(OrigBB);
- unsigned OrigBBI = OrigBB->getNumber();
- unsigned NewBBI = NewBB->getNumber();
-
- int delta = isThumb1 ? 2 : 4;
-
// Figure out how large the OrigBB is. As the first half of the original
// block, it cannot contain a tablejump. The size includes
// the new jump we added. (It should be possible to do this without
// recounting everything, but it's very confusing, and this is rarely
// executed.)
- unsigned OrigBBSize = 0;
- for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end();
- I != E; ++I)
- OrigBBSize += TII->GetInstSizeInBytes(I);
- BBSizes[OrigBBI] = OrigBBSize;
-
- // ...and adjust BBOffsets for NewBB accordingly.
- BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+ ComputeBlockSize(OrigBB);
// Figure out how large the NewMBB is. As the second half of the original
// block, it may contain a tablejump.
- unsigned NewBBSize = 0;
- for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
- I != E; ++I)
- NewBBSize += TII->GetInstSizeInBytes(I);
- // Set the size of NewBB in BBSizes. It does not include any padding now.
- BBSizes[NewBBI] = NewBBSize;
-
- MachineInstr* ThumbJTMI = prior(NewBB->end());
- if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
- // We've added another 2-byte instruction before this tablejump, which
- // means we will always need padding if we didn't before, and vice versa.
-
- // The original offset of the jump instruction was:
- unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta;
- if (OrigOffset%4 == 0) {
- // We had padding before and now we don't. No net change in code size.
- delta = 0;
- } else {
- // We didn't have padding before and now we do.
- BBSizes[NewBBI] += 2;
- delta = 4;
- }
- }
+ ComputeBlockSize(NewBB);
// All BBOffsets following these blocks must be modified.
- if (delta)
- AdjustBBOffsetsAfter(NewBB, delta);
+ AdjustBBOffsetsAfter(OrigBB);
return NewBB;
}
@@ -882,19 +966,44 @@ bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
/// WaterIsInRange - Returns true if a CPE placed after the specified
/// Water (a basic block) will be in range for the specific MI.
-
+///
+/// Compute how much the function will grow by inserting a CPE after Water.
bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset,
- MachineBasicBlock* Water, CPUser &U) {
- unsigned MaxDisp = U.MaxDisp;
- unsigned CPEOffset = BBOffsets[Water->getNumber()] +
- BBSizes[Water->getNumber()];
-
- // If the CPE is to be inserted before the instruction, that will raise
- // the offset of the instruction.
- if (CPEOffset < UserOffset)
- UserOffset += U.CPEMI->getOperand(2).getImm();
-
- return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, U.NegOk, U.IsSoImm);
+ MachineBasicBlock* Water, CPUser &U,
+ unsigned &Growth) {
+ unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
+ unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
+ unsigned NextBlockOffset, NextBlockAlignment;
+ MachineFunction::const_iterator NextBlock = Water;
+ if (++NextBlock == MF->end()) {
+ NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
+ NextBlockAlignment = 0;
+ } else {
+ NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
+ NextBlockAlignment = NextBlock->getAlignment();
+ }
+ unsigned Size = U.CPEMI->getOperand(2).getImm();
+ unsigned CPEEnd = CPEOffset + Size;
+
+ // The CPE may be able to hide in the alignment padding before the next
+ // block. It may also cause more padding to be required if it is more aligned
+ // that the next block.
+ if (CPEEnd > NextBlockOffset) {
+ Growth = CPEEnd - NextBlockOffset;
+ // Compute the padding that would go at the end of the CPE to align the next
+ // block.
+ Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment);
+
+ // If the CPE is to be inserted before the instruction, that will raise
+ // the offset of the instruction. Also account for unknown alignment padding
+ // in blocks between CPE and the user.
+ if (CPEOffset < UserOffset)
+ UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign);
+ } else
+ // CPE fits in existing padding.
+ Growth = 0;
+
+ return OffsetIsInRange(UserOffset, CPEOffset, U);
}
/// CPEIsInRange - Returns true if the distance between specific MI and
@@ -903,14 +1012,20 @@ bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
MachineInstr *CPEMI, unsigned MaxDisp,
bool NegOk, bool DoDump) {
unsigned CPEOffset = GetOffsetOf(CPEMI);
- assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE");
+ assert(CPEOffset % 4 == 0 && "Misaligned CPE");
if (DoDump) {
- DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
- << " max delta=" << MaxDisp
- << " insn address=" << UserOffset
- << " CPE address=" << CPEOffset
- << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI);
+ DEBUG({
+ unsigned Block = MI->getParent()->getNumber();
+ const BasicBlockInfo &BBI = BBInfo[Block];
+ dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
+ << " max delta=" << MaxDisp
+ << format(" insn address=%#x", UserOffset)
+ << " in BB#" << Block << ": "
+ << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI
+ << format("CPE address=%#x offset=%+d: ", CPEOffset,
+ int(CPEOffset-UserOffset));
+ });
}
return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
@@ -933,55 +1048,17 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
}
#endif // NDEBUG
-void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
- int delta) {
- MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI);
- for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs();
- i < e; ++i) {
- BBOffsets[i] += delta;
- // If some existing blocks have padding, adjust the padding as needed, a
- // bit tricky. delta can be negative so don't use % on that.
- if (!isThumb)
- continue;
- MachineBasicBlock *MBB = MBBI;
- if (!MBB->empty() && !HasInlineAsm) {
- // Constant pool entries require padding.
- if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
- unsigned OldOffset = BBOffsets[i] - delta;
- if ((OldOffset%4) == 0 && (BBOffsets[i]%4) != 0) {
- // add new padding
- BBSizes[i] += 2;
- delta += 2;
- } else if ((OldOffset%4) != 0 && (BBOffsets[i]%4) == 0) {
- // remove existing padding
- BBSizes[i] -= 2;
- delta -= 2;
- }
- }
- // Thumb1 jump tables require padding. They should be at the end;
- // following unconditional branches are removed by AnalyzeBranch.
- // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
- // table entries. So this code checks whether offset of tBR_JTr
- // is aligned; if it is, the offset of the jump table following the
- // instruction will not be aligned, and we need padding.
- MachineInstr *ThumbJTMI = prior(MBB->end());
- if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
- unsigned NewMIOffset = GetOffsetOf(ThumbJTMI);
- unsigned OldMIOffset = NewMIOffset - delta;
- if ((OldMIOffset%4) == 0 && (NewMIOffset%4) != 0) {
- // remove existing padding
- BBSizes[i] -= 2;
- delta -= 2;
- } else if ((OldMIOffset%4) != 0 && (NewMIOffset%4) == 0) {
- // add new padding
- BBSizes[i] += 2;
- delta += 2;
- }
- }
- if (delta==0)
- return;
- }
- MBBI = llvm::next(MBBI);
+void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB) {
+ for(unsigned i = BB->getNumber() + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
+ // Get the offset and known bits at the end of the layout predecessor.
+ // Include the alignment of the current block.
+ unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
+ unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
+ unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
+
+ // This is where block i begins.
+ BBInfo[i].Offset = Offset;
+ BBInfo[i].KnownBits = KnownBits;
}
}
@@ -1016,7 +1093,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
// Check to see if the CPE is already in-range.
if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) {
- DEBUG(errs() << "In range\n");
+ DEBUG(dbgs() << "In range\n");
return 1;
}
@@ -1031,7 +1108,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
if (CPEs[i].CPEMI == NULL)
continue;
if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) {
- DEBUG(errs() << "Replacing CPE#" << CPI << " with CPE#"
+ DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
<< CPEs[i].CPI << "\n");
// Point the CPUser node to the replacement
U.CPEMI = CPEs[i].CPEMI;
@@ -1079,10 +1156,9 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
if (WaterList.empty())
return false;
- bool FoundWaterThatWouldPad = false;
- water_iterator IPThatWouldPad;
- for (water_iterator IP = prior(WaterList.end()),
- B = WaterList.begin();; --IP) {
+ unsigned BestGrowth = ~0u;
+ for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();;
+ --IP) {
MachineBasicBlock* WaterBB = *IP;
// Check if water is in range and is either at a lower address than the
// current "high water mark" or a new water block that was created since
@@ -1092,31 +1168,24 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
// should be relatively uncommon and when it does happen, we want to be
// sure to take advantage of it for all the CPEs near that block, so that
// we don't insert more branches than necessary.
- if (WaterIsInRange(UserOffset, WaterBB, U) &&
+ unsigned Growth;
+ if (WaterIsInRange(UserOffset, WaterBB, U, Growth) &&
(WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
- NewWaterList.count(WaterBB))) {
- unsigned WBBId = WaterBB->getNumber();
- if (isThumb &&
- (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) {
- // This is valid Water, but would introduce padding. Remember
- // it in case we don't find any Water that doesn't do this.
- if (!FoundWaterThatWouldPad) {
- FoundWaterThatWouldPad = true;
- IPThatWouldPad = IP;
- }
- } else {
- WaterIter = IP;
+ NewWaterList.count(WaterBB)) && Growth < BestGrowth) {
+ // This is the least amount of required padding seen so far.
+ BestGrowth = Growth;
+ WaterIter = IP;
+ DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber()
+ << " Growth=" << Growth << '\n');
+
+ // Keep looking unless it is perfect.
+ if (BestGrowth == 0)
return true;
- }
}
if (IP == B)
break;
}
- if (FoundWaterThatWouldPad) {
- WaterIter = IPThatWouldPad;
- return true;
- }
- return false;
+ return BestGrowth != ~0u;
}
/// CreateNewWater - No existing WaterList entry will work for
@@ -1132,114 +1201,143 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
CPUser &U = CPUsers[CPUserIndex];
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
+ unsigned CPELogAlign = getCPELogAlign(CPEMI);
MachineBasicBlock *UserMBB = UserMI->getParent();
- unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] +
- BBSizes[UserMBB->getNumber()];
- assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]);
+ const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
// If the block does not end in an unconditional branch already, and if the
// end of the block is within range, make new water there. (The addition
// below is for the unconditional branch we will be adding: 4 bytes on ARM +
// Thumb2, 2 on Thumb1. Possible Thumb1 alignment padding is allowed for
// inside OffsetIsInRange.
- if (BBHasFallthrough(UserMBB) &&
- OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4),
- U.MaxDisp, U.NegOk, U.IsSoImm)) {
- DEBUG(errs() << "Split at end of block\n");
- if (&UserMBB->back() == UserMI)
- assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
- NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
- // Add an unconditional branch from UserMBB to fallthrough block.
- // Record it for branch lengthening; this new branch will not get out of
- // range, but if the preceding conditional branch is out of range, the
- // targets will be exchanged, and the altered branch may be out of
- // range, so the machinery has to know about it.
- int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
- if (!isThumb)
- BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
- else
- BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
- .addImm(ARMCC::AL).addReg(0);
- unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
- ImmBranches.push_back(ImmBranch(&UserMBB->back(),
- MaxDisp, false, UncondBr));
- int delta = isThumb1 ? 2 : 4;
- BBSizes[UserMBB->getNumber()] += delta;
- AdjustBBOffsetsAfter(UserMBB, delta);
- } else {
- // What a big block. Find a place within the block to split it.
- // This is a little tricky on Thumb1 since instructions are 2 bytes
- // and constant pool entries are 4 bytes: if instruction I references
- // island CPE, and instruction I+1 references CPE', it will
- // not work well to put CPE as far forward as possible, since then
- // CPE' cannot immediately follow it (that location is 2 bytes
- // farther away from I+1 than CPE was from I) and we'd need to create
- // a new island. So, we make a first guess, then walk through the
- // instructions between the one currently being looked at and the
- // possible insertion point, and make sure any other instructions
- // that reference CPEs will be able to use the same island area;
- // if not, we back up the insertion point.
-
- // The 4 in the following is for the unconditional branch we'll be
- // inserting (allows for long branch on Thumb1). Alignment of the
- // island is handled inside OffsetIsInRange.
- unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4;
- // This could point off the end of the block if we've already got
- // constant pool entries following this block; only the last one is
- // in the water list. Back past any possible branches (allow for a
- // conditional and a maximally long unconditional).
- if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1])
- BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] -
- (isThumb1 ? 6 : 8);
- unsigned EndInsertOffset = BaseInsertOffset +
- CPEMI->getOperand(2).getImm();
- MachineBasicBlock::iterator MI = UserMI;
- ++MI;
- unsigned CPUIndex = CPUserIndex+1;
- unsigned NumCPUsers = CPUsers.size();
- MachineInstr *LastIT = 0;
- for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
- Offset < BaseInsertOffset;
- Offset += TII->GetInstSizeInBytes(MI),
- MI = llvm::next(MI)) {
- if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
- CPUser &U = CPUsers[CPUIndex];
- if (!OffsetIsInRange(Offset, EndInsertOffset,
- U.MaxDisp, U.NegOk, U.IsSoImm)) {
- BaseInsertOffset -= (isThumb1 ? 2 : 4);
- EndInsertOffset -= (isThumb1 ? 2 : 4);
- }
- // This is overly conservative, as we don't account for CPEMIs
- // being reused within the block, but it doesn't matter much.
- EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
- CPUIndex++;
- }
+ if (BBHasFallthrough(UserMBB)) {
+ // Size of branch to insert.
+ unsigned Delta = isThumb1 ? 2 : 4;
+ // End of UserBlock after adding a branch.
+ unsigned UserBlockEnd = UserBBI.postOffset() + Delta;
+ // Compute the offset where the CPE will begin.
+ unsigned CPEOffset = WorstCaseAlign(UserBlockEnd, CPELogAlign,
+ UserBBI.postKnownBits());
+
+ if (OffsetIsInRange(UserOffset, CPEOffset, U)) {
+ DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
+ << format(", expected CPE offset %#x\n", CPEOffset));
+ NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
+ // Add an unconditional branch from UserMBB to fallthrough block. Record
+ // it for branch lengthening; this new branch will not get out of range,
+ // but if the preceding conditional branch is out of range, the targets
+ // will be exchanged, and the altered branch may be out of range, so the
+ // machinery has to know about it.
+ int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
+ if (!isThumb)
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
+ else
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
+ .addImm(ARMCC::AL).addReg(0);
+ unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+ ImmBranches.push_back(ImmBranch(&UserMBB->back(),
+ MaxDisp, false, UncondBr));
+ BBInfo[UserMBB->getNumber()].Size += Delta;
+ AdjustBBOffsetsAfter(UserMBB);
+ return;
+ }
+ }
- // Remember the last IT instruction.
- if (MI->getOpcode() == ARM::t2IT)
- LastIT = MI;
+ // What a big block. Find a place within the block to split it. This is a
+ // little tricky on Thumb1 since instructions are 2 bytes and constant pool
+ // entries are 4 bytes: if instruction I references island CPE, and
+ // instruction I+1 references CPE', it will not work well to put CPE as far
+ // forward as possible, since then CPE' cannot immediately follow it (that
+ // location is 2 bytes farther away from I+1 than CPE was from I) and we'd
+ // need to create a new island. So, we make a first guess, then walk through
+ // the instructions between the one currently being looked at and the
+ // possible insertion point, and make sure any other instructions that
+ // reference CPEs will be able to use the same island area; if not, we back
+ // up the insertion point.
+
+ // Try to split the block so it's fully aligned. Compute the latest split
+ // point where we can add a 4-byte branch instruction, and then
+ // WorstCaseAlign to LogAlign.
+ unsigned LogAlign = MF->getAlignment();
+ assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
+ unsigned KnownBits = UserBBI.internalKnownBits();
+ unsigned UPad = UnknownPadding(LogAlign, KnownBits);
+ unsigned BaseInsertOffset = UserOffset + U.MaxDisp;
+ DEBUG(dbgs() << format("Split in middle of big block before %#x",
+ BaseInsertOffset));
+
+ // Account for alignment and unknown padding.
+ BaseInsertOffset &= ~((1u << LogAlign) - 1);
+ BaseInsertOffset -= UPad;
+
+ // The 4 in the following is for the unconditional branch we'll be inserting
+ // (allows for long branch on Thumb1). Alignment of the island is handled
+ // inside OffsetIsInRange.
+ BaseInsertOffset -= 4;
+
+ DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
+ << " la=" << LogAlign
+ << " kb=" << KnownBits
+ << " up=" << UPad << '\n');
+
+ // This could point off the end of the block if we've already got constant
+ // pool entries following this block; only the last one is in the water list.
+ // Back past any possible branches (allow for a conditional and a maximally
+ // long unconditional).
+ if (BaseInsertOffset >= BBInfo[UserMBB->getNumber()+1].Offset)
+ BaseInsertOffset = BBInfo[UserMBB->getNumber()+1].Offset -
+ (isThumb1 ? 6 : 8);
+ unsigned EndInsertOffset =
+ WorstCaseAlign(BaseInsertOffset + 4, LogAlign, KnownBits) +
+ CPEMI->getOperand(2).getImm();
+ MachineBasicBlock::iterator MI = UserMI;
+ ++MI;
+ unsigned CPUIndex = CPUserIndex+1;
+ unsigned NumCPUsers = CPUsers.size();
+ MachineInstr *LastIT = 0;
+ for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+ Offset < BaseInsertOffset;
+ Offset += TII->GetInstSizeInBytes(MI),
+ MI = llvm::next(MI)) {
+ if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
+ CPUser &U = CPUsers[CPUIndex];
+ if (!OffsetIsInRange(Offset, EndInsertOffset, U)) {
+ // Shift intertion point by one unit of alignment so it is within reach.
+ BaseInsertOffset -= 1u << LogAlign;
+ EndInsertOffset -= 1u << LogAlign;
+ }
+ // This is overly conservative, as we don't account for CPEMIs being
+ // reused within the block, but it doesn't matter much. Also assume CPEs
+ // are added in order with alignment padding. We may eventually be able
+ // to pack the aligned CPEs better.
+ EndInsertOffset = RoundUpToAlignment(EndInsertOffset,
+ 1u << getCPELogAlign(U.CPEMI)) +
+ U.CPEMI->getOperand(2).getImm();
+ CPUIndex++;
}
- DEBUG(errs() << "Split in middle of big block\n");
- --MI;
+ // Remember the last IT instruction.
+ if (MI->getOpcode() == ARM::t2IT)
+ LastIT = MI;
+ }
- // Avoid splitting an IT block.
- if (LastIT) {
- unsigned PredReg = 0;
- ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
- if (CC != ARMCC::AL)
- MI = LastIT;
- }
- NewMBB = SplitBlockBeforeInstr(MI);
+ --MI;
+
+ // Avoid splitting an IT block.
+ if (LastIT) {
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+ if (CC != ARMCC::AL)
+ MI = LastIT;
}
+ NewMBB = SplitBlockBeforeInstr(MI);
}
/// HandleConstantPoolUser - Analyze the specified user, checking to see if it
/// is out-of-range. If so, pick up the constant pool value and move it some
/// place in-range. Return true if we changed any addresses (thus must run
/// another pass of branch lengthening), false otherwise.
-bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
- unsigned CPUserIndex) {
+bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) {
CPUser &U = CPUsers[CPUserIndex];
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
@@ -1260,11 +1358,11 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
unsigned ID = AFI->createPICLabelUId();
// Look for water where we can place this CPE.
- MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
MachineBasicBlock *NewMBB;
water_iterator IP;
if (LookForWater(U, UserOffset, IP)) {
- DEBUG(errs() << "found water in range\n");
+ DEBUG(dbgs() << "Found water in range\n");
MachineBasicBlock *WaterBB = *IP;
// If the original WaterList entry was "new water" on this iteration,
@@ -1279,7 +1377,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
} else {
// No water found.
- DEBUG(errs() << "No water found\n");
+ DEBUG(dbgs() << "No water found\n");
CreateNewWater(CPUserIndex, UserOffset, NewMBB);
// SplitBlockBeforeInstr adds to WaterList, which is important when it is
@@ -1304,7 +1402,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
WaterList.erase(IP);
// Okay, we know we can put an island before NewMBB now, do it!
- MF.insert(NewMBB, NewIsland);
+ MF->insert(NewMBB, NewIsland);
// Update internal data structures to account for the newly inserted MBB.
UpdateForInsertedWaterBlock(NewIsland);
@@ -1320,13 +1418,12 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
++NumCPEs;
- BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
- // Compensate for .align 2 in thumb mode.
- if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm))
- Size += 2;
+ // Mark the basic block as aligned as required by the const-pool entry.
+ NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
+
// Increase the size of the island block to account for the new entry.
- BBSizes[NewIsland->getNumber()] += Size;
- AdjustBBOffsetsAfter(NewIsland, Size);
+ BBInfo[NewIsland->getNumber()].Size += Size;
+ AdjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland)));
// Finally, change the CPI in the instruction operand to be ID.
for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
@@ -1335,8 +1432,8 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
break;
}
- DEBUG(errs() << " Moved CPE to #" << ID << " CPI=" << CPI
- << '\t' << *UserMI);
+ DEBUG(dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI
+ << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset));
return true;
}
@@ -1347,19 +1444,18 @@ void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
MachineBasicBlock *CPEBB = CPEMI->getParent();
unsigned Size = CPEMI->getOperand(2).getImm();
CPEMI->eraseFromParent();
- BBSizes[CPEBB->getNumber()] -= Size;
+ BBInfo[CPEBB->getNumber()].Size -= Size;
// All succeeding offsets have the current size value added in, fix this.
if (CPEBB->empty()) {
- // In thumb1 mode, the size of island may be padded by two to compensate for
- // the alignment requirement. Then it will now be 2 when the block is
- // empty, so fix this.
- // All succeeding offsets have the current size value added in, fix this.
- if (BBSizes[CPEBB->getNumber()] != 0) {
- Size += BBSizes[CPEBB->getNumber()];
- BBSizes[CPEBB->getNumber()] = 0;
- }
- }
- AdjustBBOffsetsAfter(CPEBB, -Size);
+ BBInfo[CPEBB->getNumber()].Size = 0;
+
+ // This block no longer needs to be aligned. <rdar://problem/10534709>.
+ CPEBB->setAlignment(0);
+ } else
+ // Entries are sorted by descending alignment, so realign from the front.
+ CPEBB->setAlignment(getCPELogAlign(CPEBB->begin()));
+
+ AdjustBBOffsetsAfter(CPEBB);
// An island has only one predecessor BB and one successor BB. Check if
// this BB's predecessor jumps directly to this BB's successor. This
// shouldn't happen currently.
@@ -1390,9 +1486,9 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
unsigned MaxDisp) {
unsigned PCAdj = isThumb ? 4 : 8;
unsigned BrOffset = GetOffsetOf(MI) + PCAdj;
- unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+ unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
- DEBUG(errs() << "Branch of destination BB#" << DestBB->getNumber()
+ DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
<< " from BB#" << MI->getParent()->getNumber()
<< " max delta=" << MaxDisp
<< " from " << GetOffsetOf(MI) << " to " << DestOffset
@@ -1411,7 +1507,7 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far
/// away to fit in its displacement field.
-bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
+bool ARMConstantIslands::FixUpImmediateBr(ImmBranch &Br) {
MachineInstr *MI = Br.MI;
MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
@@ -1420,8 +1516,8 @@ bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
return false;
if (!Br.isCond)
- return FixUpUnconditionalBr(MF, Br);
- return FixUpConditionalBr(MF, Br);
+ return FixUpUnconditionalBr(Br);
+ return FixUpConditionalBr(Br);
}
/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is
@@ -1429,7 +1525,7 @@ bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
/// spilled in the epilogue, then we can use BL to implement a far jump.
/// Otherwise, add an intermediate branch instruction to a branch.
bool
-ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
+ARMConstantIslands::FixUpUnconditionalBr(ImmBranch &Br) {
MachineInstr *MI = Br.MI;
MachineBasicBlock *MBB = MI->getParent();
if (!isThumb1)
@@ -1438,12 +1534,12 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
// Use BL to implement far jump.
Br.MaxDisp = (1 << 21) * 2;
MI->setDesc(TII->get(ARM::tBfar));
- BBSizes[MBB->getNumber()] += 2;
- AdjustBBOffsetsAfter(MBB, 2);
+ BBInfo[MBB->getNumber()].Size += 2;
+ AdjustBBOffsetsAfter(MBB);
HasFarJump = true;
++NumUBrFixed;
- DEBUG(errs() << " Changed B to long jump " << *MI);
+ DEBUG(dbgs() << " Changed B to long jump " << *MI);
return true;
}
@@ -1452,7 +1548,7 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
/// far away to fit in its displacement field. It is converted to an inverse
/// conditional branch + an unconditional branch to the destination.
bool
-ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
+ARMConstantIslands::FixUpConditionalBr(ImmBranch &Br) {
MachineInstr *MI = Br.MI;
MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
@@ -1487,7 +1583,7 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
// b L1
MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
if (BBIsInRange(MI, NewDest, Br.MaxDisp)) {
- DEBUG(errs() << " Invert Bcc condition and swap its destination with "
+ DEBUG(dbgs() << " Invert Bcc condition and swap its destination with "
<< *BMI);
BMI->getOperand(0).setMBB(DestBB);
MI->getOperand(0).setMBB(NewDest);
@@ -1502,15 +1598,13 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
// No need for the branch to the next block. We're adding an unconditional
// branch to the destination.
int delta = TII->GetInstSizeInBytes(&MBB->back());
- BBSizes[MBB->getNumber()] -= delta;
- MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB));
- AdjustBBOffsetsAfter(SplitBB, -delta);
+ BBInfo[MBB->getNumber()].Size -= delta;
MBB->back().eraseFromParent();
- // BBOffsets[SplitBB] is wrong temporarily, fixed below
+ // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
}
MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
- DEBUG(errs() << " Insert B to BB#" << DestBB->getNumber()
+ DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber()
<< " also invert condition and change dest. to BB#"
<< NextBB->getNumber() << "\n");
@@ -1519,23 +1613,20 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
.addMBB(NextBB).addImm(CC).addReg(CCReg);
Br.MI = &MBB->back();
- BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
if (isThumb)
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
.addImm(ARMCC::AL).addReg(0);
else
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
- BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
// Remove the old conditional branch. It may or may not still be in MBB.
- BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI);
+ BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI);
MI->eraseFromParent();
-
- // The net size change is an addition of one unconditional branch.
- int delta = TII->GetInstSizeInBytes(&MBB->back());
- AdjustBBOffsetsAfter(MBB, delta);
+ AdjustBBOffsetsAfter(MBB);
return true;
}
@@ -1561,7 +1652,7 @@ bool ARMConstantIslands::UndoLRSpillRestore() {
return MadeChange;
}
-bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
+bool ARMConstantIslands::OptimizeThumb2Instructions() {
bool MadeChange = false;
// Shrink ADR and LDR from constantpool.
@@ -1598,19 +1689,19 @@ bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
U.MI->setDesc(TII->get(NewOpc));
MachineBasicBlock *MBB = U.MI->getParent();
- BBSizes[MBB->getNumber()] -= 2;
- AdjustBBOffsetsAfter(MBB, -2);
+ BBInfo[MBB->getNumber()].Size -= 2;
+ AdjustBBOffsetsAfter(MBB);
++NumT2CPShrunk;
MadeChange = true;
}
}
- MadeChange |= OptimizeThumb2Branches(MF);
- MadeChange |= OptimizeThumb2JumpTables(MF);
+ MadeChange |= OptimizeThumb2Branches();
+ MadeChange |= OptimizeThumb2JumpTables();
return MadeChange;
}
-bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
+bool ARMConstantIslands::OptimizeThumb2Branches() {
bool MadeChange = false;
for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) {
@@ -1639,8 +1730,8 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
if (BBIsInRange(Br.MI, DestBB, MaxOffs)) {
Br.MI->setDesc(TII->get(NewOpc));
MachineBasicBlock *MBB = Br.MI->getParent();
- BBSizes[MBB->getNumber()] -= 2;
- AdjustBBOffsetsAfter(MBB, -2);
+ BBInfo[MBB->getNumber()].Size -= 2;
+ AdjustBBOffsetsAfter(MBB);
++NumT2BrShrunk;
MadeChange = true;
}
@@ -1663,7 +1754,7 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
// Check if the distance is within 126. Subtract starting offset by 2
// because the cmp will be eliminated.
unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2;
- unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+ unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
MachineBasicBlock::iterator CmpMI = Br.MI;
if (CmpMI != Br.MI->getParent()->begin()) {
@@ -1681,8 +1772,8 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
CmpMI->eraseFromParent();
Br.MI->eraseFromParent();
Br.MI = NewBR;
- BBSizes[MBB->getNumber()] -= 2;
- AdjustBBOffsetsAfter(MBB, -2);
+ BBInfo[MBB->getNumber()].Size -= 2;
+ AdjustBBOffsetsAfter(MBB);
++NumCBZ;
MadeChange = true;
}
@@ -1696,12 +1787,12 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
/// jumptables when it's possible.
-bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
+bool ARMConstantIslands::OptimizeThumb2JumpTables() {
bool MadeChange = false;
// FIXME: After the tables are shrunk, can we get rid some of the
// constantpool tables?
- MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
if (MJTI == 0) return false;
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
@@ -1709,7 +1800,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
MachineInstr *MI = T2JumpTables[i];
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MCID.getNumOperands();
- unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
+ unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
MachineOperand JTOP = MI->getOperand(JTOpIdx);
unsigned JTI = JTOP.getIndex();
assert(JTI < JT.size());
@@ -1720,7 +1811,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
MachineBasicBlock *MBB = JTBBs[j];
- unsigned DstOffset = BBOffsets[MBB->getNumber()];
+ unsigned DstOffset = BBInfo[MBB->getNumber()].Offset;
// Negative offset is not ok. FIXME: We should change BB layout to make
// sure all the branches are forward.
if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
@@ -1808,8 +1899,8 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
MI->eraseFromParent();
int delta = OrigSize - NewSize;
- BBSizes[MBB->getNumber()] -= delta;
- AdjustBBOffsetsAfter(MBB, -delta);
+ BBInfo[MBB->getNumber()].Size -= delta;
+ AdjustBBOffsetsAfter(MBB);
++NumTBs;
MadeChange = true;
@@ -1821,10 +1912,10 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that
/// jump tables always branch forwards, since that's what tbb and tbh need.
-bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
+bool ARMConstantIslands::ReorderThumb2JumpTables() {
bool MadeChange = false;
- MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
if (MJTI == 0) return false;
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
@@ -1832,7 +1923,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
MachineInstr *MI = T2JumpTables[i];
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MCID.getNumOperands();
- unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
+ unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
MachineOperand JTOP = MI->getOperand(JTOpIdx);
unsigned JTI = JTOP.getIndex();
assert(JTI < JT.size());
@@ -1864,8 +1955,6 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
MachineBasicBlock *ARMConstantIslands::
AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
{
- MachineFunction &MF = *BB->getParent();
-
// If the destination block is terminated by an unconditional branch,
// try to move it; otherwise, create a new block following the jump
// table that branches back to the actual target. This is a very simple
@@ -1882,22 +1971,22 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
// If the block ends in an unconditional branch, move it. The prior block
// has to have an analyzable terminator for us to move this one. Be paranoid
// and make sure we're not trying to move the entry block of the function.
- if (!B && Cond.empty() && BB != MF.begin() &&
+ if (!B && Cond.empty() && BB != MF->begin() &&
!TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
BB->moveAfter(JTBB);
OldPrior->updateTerminator();
BB->updateTerminator();
// Update numbering to account for the block being moved.
- MF.RenumberBlocks();
+ MF->RenumberBlocks();
++NumJTMoved;
return NULL;
}
// Create a new MBB for the code after the jump BB.
MachineBasicBlock *NewBB =
- MF.CreateMachineBasicBlock(JTBB->getBasicBlock());
+ MF->CreateMachineBasicBlock(JTBB->getBasicBlock());
MachineFunction::iterator MBBI = JTBB; ++MBBI;
- MF.insert(MBBI, NewBB);
+ MF->insert(MBBI, NewBB);
// Add an unconditional branch from NewBB to BB.
// There doesn't seem to be meaningful DebugInfo available; this doesn't
@@ -1907,7 +1996,7 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
.addImm(ARMCC::AL).addReg(0);
// Update internal data structures to account for the newly inserted MBB.
- MF.RenumberBlocks(NewBB);
+ MF->RenumberBlocks(NewBB);
// Update the CFG.
NewBB->addSuccessor(BB);
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index fc464ea..01d772d 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -61,7 +61,7 @@ namespace {
void ExpandVST(MachineBasicBlock::iterator &MBBI);
void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
- unsigned Opc, bool IsExt, unsigned NumRegs);
+ unsigned Opc, bool IsExt);
void ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
};
@@ -129,12 +129,15 @@ namespace {
}
static const NEONLdStTableEntry NEONLdStTable[] = {
-{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, false, SingleSpc, 2, 4,true},
-{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true, true, SingleSpc, 2, 4,true},
-{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, false, SingleSpc, 2, 2,true},
-{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true, true, SingleSpc, 2, 2,true},
-{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, false, SingleSpc, 2, 8,true},
-{ ARM::VLD1DUPq8Pseudo_UPD, ARM::VLD1DUPq8_UPD, true, true, true, SingleSpc, 2, 8,true},
+{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, false, SingleSpc, 2, 4,false},
+{ ARM::VLD1DUPq16PseudoWB_fixed, ARM::VLD1DUPq16wb_fixed, true, true, true, SingleSpc, 2, 4,false},
+{ ARM::VLD1DUPq16PseudoWB_register, ARM::VLD1DUPq16wb_register, true, true, true, SingleSpc, 2, 4,false},
+{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, false, SingleSpc, 2, 2,false},
+{ ARM::VLD1DUPq32PseudoWB_fixed, ARM::VLD1DUPq32wb_fixed, true, true, false, SingleSpc, 2, 2,false},
+{ ARM::VLD1DUPq32PseudoWB_register, ARM::VLD1DUPq32wb_register, true, true, true, SingleSpc, 2, 2,false},
+{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, false, SingleSpc, 2, 8,false},
+{ ARM::VLD1DUPq8PseudoWB_fixed, ARM::VLD1DUPq8wb_fixed, true, true, false, SingleSpc, 2, 8,false},
+{ ARM::VLD1DUPq8PseudoWB_register, ARM::VLD1DUPq8wb_register, true, true, true, SingleSpc, 2, 8,false},
{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true},
{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true},
@@ -177,18 +180,24 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true},
{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, false, SingleSpc, 2, 4 ,false},
-{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, true, SingleSpc, 2, 4 ,false},
+{ ARM::VLD2d16PseudoWB_fixed, ARM::VLD2d16wb_fixed, true, true, false, SingleSpc, 2, 4 ,false},
+{ ARM::VLD2d16PseudoWB_register, ARM::VLD2d16wb_register, true, true, true, SingleSpc, 2, 4 ,false},
{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, false, SingleSpc, 2, 2 ,false},
-{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, true, SingleSpc, 2, 2 ,false},
+{ ARM::VLD2d32PseudoWB_fixed, ARM::VLD2d32wb_fixed, true, true, false, SingleSpc, 2, 2 ,false},
+{ ARM::VLD2d32PseudoWB_register, ARM::VLD2d32wb_register, true, true, true, SingleSpc, 2, 2 ,false},
{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, false, SingleSpc, 2, 8 ,false},
-{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, true, SingleSpc, 2, 8 ,false},
+{ ARM::VLD2d8PseudoWB_fixed, ARM::VLD2d8wb_fixed, true, true, false, SingleSpc, 2, 8 ,false},
+{ ARM::VLD2d8PseudoWB_register, ARM::VLD2d8wb_register, true, true, true, SingleSpc, 2, 8 ,false},
{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false},
-{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, true, SingleSpc, 4, 4 ,false},
+{ ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false},
+{ ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false},
{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false},
-{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, true, SingleSpc, 4, 2 ,false},
+{ ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false},
+{ ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false},
{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false},
-{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, true, SingleSpc, 4, 8 ,false},
+{ ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false},
+{ ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false},
{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true},
{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true},
@@ -267,10 +276,12 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true},
{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true},
-{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,true},
-{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, true, SingleSpc, 4, 1 ,true},
-{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,true},
-{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, true, SingleSpc, 3, 1 ,true},
+{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false},
+{ ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false},
+{ ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false},
{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, false, SingleSpc, 2, 4 ,false},
{ ARM::VST1q16PseudoWB_fixed, ARM::VST1q16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false},
@@ -296,19 +307,25 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true},
{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true},
-{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,true},
-{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, true, SingleSpc, 2, 4 ,true},
-{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,true},
-{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, true, SingleSpc, 2, 2 ,true},
-{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,true},
-{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, true, SingleSpc, 2, 8 ,true},
-
-{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,true},
-{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, true, SingleSpc, 4, 4 ,true},
-{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,true},
-{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, true, SingleSpc, 4, 2 ,true},
-{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,true},
-{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, true, SingleSpc, 4, 8 ,true},
+{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,false},
+{ ARM::VST2d16PseudoWB_fixed, ARM::VST2d16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false},
+{ ARM::VST2d16PseudoWB_register, ARM::VST2d16wb_register, false, true, true, SingleSpc, 2, 4 ,false},
+{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,false},
+{ ARM::VST2d32PseudoWB_fixed, ARM::VST2d32wb_fixed, false, true, true, SingleSpc, 2, 2 ,false},
+{ ARM::VST2d32PseudoWB_register, ARM::VST2d32wb_register, false, true, true, SingleSpc, 2, 2 ,false},
+{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,false},
+{ ARM::VST2d8PseudoWB_fixed, ARM::VST2d8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false},
+{ ARM::VST2d8PseudoWB_register, ARM::VST2d8wb_register, false, true, true, SingleSpc, 2, 8 ,false},
+
+{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false},
+{ ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false},
+{ ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false},
{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true},
{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
@@ -620,7 +637,7 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
/// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
/// register operands to real instructions with D register operands.
void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
- unsigned Opc, bool IsExt, unsigned NumRegs) {
+ unsigned Opc, bool IsExt) {
MachineInstr &MI = *MBBI;
MachineBasicBlock &MBB = *MI.getParent();
@@ -636,11 +653,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
- MIB.addReg(D0).addReg(D1);
- if (NumRegs > 2)
- MIB.addReg(D2);
- if (NumRegs > 3)
- MIB.addReg(D3);
+ MIB.addReg(D0);
// Copy the other source register operand.
MIB.addOperand(MI.getOperand(OpIdx++));
@@ -1090,12 +1103,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VLD2q8Pseudo:
case ARM::VLD2q16Pseudo:
case ARM::VLD2q32Pseudo:
- case ARM::VLD2d8Pseudo_UPD:
- case ARM::VLD2d16Pseudo_UPD:
- case ARM::VLD2d32Pseudo_UPD:
- case ARM::VLD2q8Pseudo_UPD:
- case ARM::VLD2q16Pseudo_UPD:
- case ARM::VLD2q32Pseudo_UPD:
+ case ARM::VLD2d8PseudoWB_fixed:
+ case ARM::VLD2d16PseudoWB_fixed:
+ case ARM::VLD2d32PseudoWB_fixed:
+ case ARM::VLD2q8PseudoWB_fixed:
+ case ARM::VLD2q16PseudoWB_fixed:
+ case ARM::VLD2q32PseudoWB_fixed:
+ case ARM::VLD2d8PseudoWB_register:
+ case ARM::VLD2d16PseudoWB_register:
+ case ARM::VLD2d32PseudoWB_register:
+ case ARM::VLD2q8PseudoWB_register:
+ case ARM::VLD2q16PseudoWB_register:
+ case ARM::VLD2q32PseudoWB_register:
case ARM::VLD3d8Pseudo:
case ARM::VLD3d16Pseudo:
case ARM::VLD3d32Pseudo:
@@ -1131,9 +1150,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VLD1DUPq8Pseudo:
case ARM::VLD1DUPq16Pseudo:
case ARM::VLD1DUPq32Pseudo:
- case ARM::VLD1DUPq8Pseudo_UPD:
- case ARM::VLD1DUPq16Pseudo_UPD:
- case ARM::VLD1DUPq32Pseudo_UPD:
+ case ARM::VLD1DUPq8PseudoWB_fixed:
+ case ARM::VLD1DUPq16PseudoWB_fixed:
+ case ARM::VLD1DUPq32PseudoWB_fixed:
+ case ARM::VLD1DUPq8PseudoWB_register:
+ case ARM::VLD1DUPq16PseudoWB_register:
+ case ARM::VLD1DUPq32PseudoWB_register:
case ARM::VLD2DUPd8Pseudo:
case ARM::VLD2DUPd16Pseudo:
case ARM::VLD2DUPd32Pseudo:
@@ -1173,12 +1195,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VST2q8Pseudo:
case ARM::VST2q16Pseudo:
case ARM::VST2q32Pseudo:
- case ARM::VST2d8Pseudo_UPD:
- case ARM::VST2d16Pseudo_UPD:
- case ARM::VST2d32Pseudo_UPD:
- case ARM::VST2q8Pseudo_UPD:
- case ARM::VST2q16Pseudo_UPD:
- case ARM::VST2q32Pseudo_UPD:
+ case ARM::VST2d8PseudoWB_fixed:
+ case ARM::VST2d16PseudoWB_fixed:
+ case ARM::VST2d32PseudoWB_fixed:
+ case ARM::VST2q8PseudoWB_fixed:
+ case ARM::VST2q16PseudoWB_fixed:
+ case ARM::VST2q32PseudoWB_fixed:
+ case ARM::VST2d8PseudoWB_register:
+ case ARM::VST2d16PseudoWB_register:
+ case ARM::VST2d32PseudoWB_register:
+ case ARM::VST2q8PseudoWB_register:
+ case ARM::VST2q16PseudoWB_register:
+ case ARM::VST2q32PseudoWB_register:
case ARM::VST3d8Pseudo:
case ARM::VST3d16Pseudo:
case ARM::VST3d32Pseudo:
@@ -1186,7 +1214,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VST3d8Pseudo_UPD:
case ARM::VST3d16Pseudo_UPD:
case ARM::VST3d32Pseudo_UPD:
- case ARM::VST1d64TPseudo_UPD:
+ case ARM::VST1d64TPseudoWB_fixed:
+ case ARM::VST1d64TPseudoWB_register:
case ARM::VST3q8Pseudo_UPD:
case ARM::VST3q16Pseudo_UPD:
case ARM::VST3q32Pseudo_UPD:
@@ -1203,7 +1232,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VST4d8Pseudo_UPD:
case ARM::VST4d16Pseudo_UPD:
case ARM::VST4d32Pseudo_UPD:
- case ARM::VST1d64QPseudo_UPD:
+ case ARM::VST1d64QPseudoWB_fixed:
+ case ARM::VST1d64QPseudoWB_register:
case ARM::VST4q8Pseudo_UPD:
case ARM::VST4q16Pseudo_UPD:
case ARM::VST4q32Pseudo_UPD:
@@ -1291,12 +1321,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
ExpandLaneOp(MBBI);
return true;
- case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true;
- case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true;
- case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true;
- case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true;
- case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true;
- case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true;
+ case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false); return true;
+ case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
+ case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
+ case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true); return true;
+ case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
+ case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
}
return false;
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 9bae422..a98dfc3 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -178,10 +178,12 @@ class ARMFastISel : public FastISel {
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt);
- bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, bool isZExt,
- bool allocReg);
+ bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
+ unsigned Alignment = 0, bool isZExt = true,
+ bool allocReg = true);
- bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
+ bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+ unsigned Alignment = 0);
bool ARMComputeAddress(const Value *Obj, Address &Addr);
void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3);
bool ARMIsMemCpySmall(uint64_t Len);
@@ -227,8 +229,7 @@ class ARMFastISel : public FastISel {
// we don't care about implicit defs here, just places we'll need to add a
// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.hasOptionalDef())
+ if (!MI->hasOptionalDef())
return false;
// Look to see if our OptionalDef is defining CPSR or CCR.
@@ -702,7 +703,7 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
TargetRegisterClass* RC = TLI.getRegClassFor(VT);
unsigned ResultReg = createResultReg(RC);
unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
.addFrameIndex(SI->second)
.addImm(0));
@@ -898,7 +899,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) {
ARM::GPRRegisterClass;
unsigned ResultReg = createResultReg(RC);
unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
.addFrameIndex(Addr.Base.FI)
.addImm(0));
@@ -937,7 +938,8 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
// Now add the rest of the operands.
MIB.addFrameIndex(FI);
- // ARM halfword load/stores and signed byte loads need an additional operand.
+ // ARM halfword load/stores and signed byte loads need an additional
+ // operand.
if (useAM3) {
signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
MIB.addReg(0);
@@ -950,7 +952,8 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
// Now add the rest of the operands.
MIB.addReg(Addr.Base.Reg);
- // ARM halfword load/stores and signed byte loads need an additional operand.
+ // ARM halfword load/stores and signed byte loads need an additional
+ // operand.
if (useAM3) {
signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
MIB.addReg(0);
@@ -963,10 +966,11 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
}
bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
- bool isZExt = true, bool allocReg = true) {
+ unsigned Alignment, bool isZExt, bool allocReg) {
assert(VT.isSimple() && "Non-simple types are invalid here!");
unsigned Opc;
bool useAM3 = false;
+ bool needVMOV = false;
TargetRegisterClass *RC;
switch (VT.getSimpleVT().SimpleTy) {
// This is mostly going to be Neon/vector support.
@@ -1012,10 +1016,25 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
RC = ARM::GPRRegisterClass;
break;
case MVT::f32:
- Opc = ARM::VLDRS;
- RC = TLI.getRegClassFor(VT);
+ if (!Subtarget->hasVFP2()) return false;
+ // Unaligned loads need special handling. Floats require word-alignment.
+ if (Alignment && Alignment < 4) {
+ needVMOV = true;
+ VT = MVT::i32;
+ Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
+ RC = ARM::GPRRegisterClass;
+ } else {
+ Opc = ARM::VLDRS;
+ RC = TLI.getRegClassFor(VT);
+ }
break;
case MVT::f64:
+ if (!Subtarget->hasVFP2()) return false;
+ // FIXME: Unaligned loads need special handling. Doublewords require
+ // word-alignment.
+ if (Alignment && Alignment < 4)
+ return false;
+
Opc = ARM::VLDRD;
RC = TLI.getRegClassFor(VT);
break;
@@ -1030,6 +1049,16 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg);
AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
+
+ // If we had an unaligned load of a float we've converted it to an regular
+ // load. Now we must move from the GRP to the FP register.
+ if (needVMOV) {
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVSR), MoveReg)
+ .addReg(ResultReg));
+ ResultReg = MoveReg;
+ }
return true;
}
@@ -1048,12 +1077,14 @@ bool ARMFastISel::SelectLoad(const Instruction *I) {
if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
unsigned ResultReg;
- if (!ARMEmitLoad(VT, ResultReg, Addr)) return false;
+ if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
+ return false;
UpdateValueMap(I, ResultReg);
return true;
}
-bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
+bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+ unsigned Alignment) {
unsigned StrOpc;
bool useAM3 = false;
switch (VT.getSimpleVT().SimpleTy) {
@@ -1101,10 +1132,26 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
break;
case MVT::f32:
if (!Subtarget->hasVFP2()) return false;
- StrOpc = ARM::VSTRS;
+ // Unaligned stores need special handling. Floats require word-alignment.
+ if (Alignment && Alignment < 4) {
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVRS), MoveReg)
+ .addReg(SrcReg));
+ SrcReg = MoveReg;
+ VT = MVT::i32;
+ StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
+ } else {
+ StrOpc = ARM::VSTRS;
+ }
break;
case MVT::f64:
if (!Subtarget->hasVFP2()) return false;
+ // FIXME: Unaligned stores need special handling. Doublewords require
+ // word-alignment.
+ if (Alignment && Alignment < 4)
+ return false;
+
StrOpc = ARM::VSTRD;
break;
}
@@ -1141,7 +1188,8 @@ bool ARMFastISel::SelectStore(const Instruction *I) {
if (!ARMComputeAddress(I->getOperand(1), Addr))
return false;
- if (!ARMEmitStore(VT, SrcReg, Addr)) return false;
+ if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
+ return false;
return true;
}
@@ -1360,7 +1408,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
unsigned SrcReg1 = getRegForValue(Src1Value);
if (SrcReg1 == 0) return false;
- unsigned SrcReg2;
+ unsigned SrcReg2 = 0;
if (!UseImm) {
SrcReg2 = getRegForValue(Src2Value);
if (SrcReg2 == 0) return false;
@@ -1577,7 +1625,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
(ARM_AM::getSOImmVal(Imm) != -1);
}
- unsigned Op2Reg;
+ unsigned Op2Reg = 0;
if (!UseImm) {
Op2Reg = getRegForValue(I->getOperand(2));
if (Op2Reg == 0) return false;
@@ -1716,7 +1764,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
// Use target triple & subtarget features to do actual dispatch.
if (Subtarget->isAAPCS_ABI()) {
if (Subtarget->hasVFP2() &&
- FloatABIType == FloatABI::Hard)
+ TM.Options.FloatABIType == FloatABI::Hard)
return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
else
return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
@@ -1765,21 +1813,23 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
switch (VA.getLocInfo()) {
case CCValAssign::Full: break;
case CCValAssign::SExt: {
- EVT DestVT = VA.getLocVT();
+ MVT DestVT = VA.getLocVT();
unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT,
/*isZExt*/false);
assert (ResultReg != 0 && "Failed to emit a sext");
Arg = ResultReg;
+ ArgVT = DestVT;
break;
}
case CCValAssign::AExt:
// Intentional fall-through. Handle AExt and ZExt.
case CCValAssign::ZExt: {
- EVT DestVT = VA.getLocVT();
+ MVT DestVT = VA.getLocVT();
unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT,
/*isZExt*/true);
assert (ResultReg != 0 && "Failed to emit a sext");
Arg = ResultReg;
+ ArgVT = DestVT;
break;
}
case CCValAssign::BCvt: {
@@ -2456,7 +2506,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
unsigned ResultReg = MI->getOperand(0).getReg();
- if (!ARMEmitLoad(VT, ResultReg, Addr, isZExt, false))
+ if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
return false;
MI->eraseFromParent();
return true;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 2d1de6f..06944b1 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -37,7 +37,8 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
// Always eliminate non-leaf frame pointers.
- return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
+ return ((MF.getTarget().Options.DisableFramePointerElim(MF) &&
+ MFI->hasCalls()) ||
RegInfo->needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken());
@@ -309,8 +310,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI->getDesc().isReturn() &&
- "Can only insert epilog into returning blocks");
+ assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index 787f6a2..a5fd15b 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -21,7 +21,7 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
// FIXME: Detect integer instructions properly.
const MCInstrDesc &MCID = MI->getDesc();
unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
- if (MCID.mayStore())
+ if (MI->mayStore())
return false;
unsigned Opcode = MCID.getOpcode();
if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
@@ -38,9 +38,6 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
MachineInstr *MI = SU->getInstr();
if (!MI->isDebugValue()) {
- if (ITBlockSize && MI != ITBlockMIs[ITBlockSize-1])
- return Hazard;
-
// Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following
// a VMLA / VMLS will cause 4 cycle stall.
const MCInstrDesc &MCID = MI->getDesc();
@@ -48,9 +45,9 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
MachineInstr *DefMI = LastMI;
const MCInstrDesc &LastMCID = LastMI->getDesc();
// Skip over one non-VFP / NEON instruction.
- if (!LastMCID.isBarrier() &&
+ if (!LastMI->isBarrier() &&
// On A9, AGU and NEON/FPU are muxed.
- !(STI.isCortexA9() && (LastMCID.mayLoad() || LastMCID.mayStore())) &&
+ !(STI.isCortexA9() && (LastMI->mayLoad() || LastMI->mayStore())) &&
(LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
MachineBasicBlock::iterator I = LastMI;
if (I != LastMI->getParent()->begin()) {
@@ -76,30 +73,11 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
void ARMHazardRecognizer::Reset() {
LastMI = 0;
FpMLxStalls = 0;
- ITBlockSize = 0;
ScoreboardHazardRecognizer::Reset();
}
void ARMHazardRecognizer::EmitInstruction(SUnit *SU) {
MachineInstr *MI = SU->getInstr();
- unsigned Opcode = MI->getOpcode();
- if (ITBlockSize) {
- --ITBlockSize;
- } else if (Opcode == ARM::t2IT) {
- unsigned Mask = MI->getOperand(1).getImm();
- unsigned NumTZ = CountTrailingZeros_32(Mask);
- assert(NumTZ <= 3 && "Invalid IT mask!");
- ITBlockSize = 4 - NumTZ;
- MachineBasicBlock::iterator I = MI;
- for (unsigned i = 0; i < ITBlockSize; ++i) {
- // Advance to the next instruction, skipping any dbg_value instructions.
- do {
- ++I;
- } while (I->isDebugValue());
- ITBlockMIs[ITBlockSize-1-i] = &*I;
- }
- }
-
if (!MI->isDebugValue()) {
LastMI = MI;
FpMLxStalls = 0;
diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h
index 2bc218d..98bfc4c 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/lib/Target/ARM/ARMHazardRecognizer.h
@@ -23,6 +23,10 @@ class ARMBaseRegisterInfo;
class ARMSubtarget;
class MachineInstr;
+/// ARMHazardRecognizer handles special constraints that are not expressed in
+/// the scheduling itinerary. This is only used during postRA scheduling. The
+/// ARM preRA scheduler uses an unspecialized instance of the
+/// ScoreboardHazardRecognizer.
class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
const ARMBaseInstrInfo &TII;
const ARMBaseRegisterInfo &TRI;
@@ -30,8 +34,6 @@ class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
MachineInstr *LastMI;
unsigned FpMLxStalls;
- unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled.
- MachineInstr *ITBlockMIs[4];
public:
ARMHazardRecognizer(const InstrItineraryData *ItinData,
@@ -40,7 +42,7 @@ public:
const ARMSubtarget &sti,
const ScheduleDAG *DAG) :
ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii),
- TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {}
+ TRI(tri), STI(sti), LastMI(0) {}
virtual HazardType getHazardType(SUnit *SU, int Stalls);
virtual void Reset();
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index bc8588f..7473141 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1579,6 +1579,22 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
case ARM::VST1q16PseudoWB_fixed: return ARM::VST1q16PseudoWB_register;
case ARM::VST1q32PseudoWB_fixed: return ARM::VST1q32PseudoWB_register;
case ARM::VST1q64PseudoWB_fixed: return ARM::VST1q64PseudoWB_register;
+ case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
+ case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
+
+ case ARM::VLD2d8PseudoWB_fixed: return ARM::VLD2d8PseudoWB_register;
+ case ARM::VLD2d16PseudoWB_fixed: return ARM::VLD2d16PseudoWB_register;
+ case ARM::VLD2d32PseudoWB_fixed: return ARM::VLD2d32PseudoWB_register;
+ case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
+ case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
+ case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
+
+ case ARM::VST2d8PseudoWB_fixed: return ARM::VST2d8PseudoWB_register;
+ case ARM::VST2d16PseudoWB_fixed: return ARM::VST2d16PseudoWB_register;
+ case ARM::VST2d32PseudoWB_fixed: return ARM::VST2d32PseudoWB_register;
+ case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
+ case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
+ case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
}
return Opc; // If not one we handle, return it unchanged.
}
@@ -1646,13 +1662,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
- // FIXME: VLD1 fixed increment doesn't need Reg0. Remove the reg0
+ // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
// case entirely when the rest are updated to that form, too.
- if (NumVecs == 1 && !isa<ConstantSDNode>(Inc.getNode()))
+ if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode()))
Opc = getVLDSTRegisterUpdateOpcode(Opc);
- // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
+ // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
- if ((NumVecs != 1 && Opc != ARM::VLD1q64PseudoWB_fixed) ||
+ if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64PseudoWB_fixed) ||
!isa<ConstantSDNode>(Inc.getNode()))
Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
}
@@ -1796,9 +1812,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
- // FIXME: VST1 fixed increment doesn't need Reg0. Remove the reg0
+ // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
// case entirely when the rest are updated to that form, too.
- if (NumVecs == 1 && !isa<ConstantSDNode>(Inc.getNode()))
+ if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
Opc = getVLDSTRegisterUpdateOpcode(Opc);
// We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
@@ -2810,10 +2826,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VLD2_UPD: {
- unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD,
- ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed};
- unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD,
- ARM::VLD2q32Pseudo_UPD };
+ unsigned DOpcodes[] = { ARM::VLD2d8PseudoWB_fixed,
+ ARM::VLD2d16PseudoWB_fixed,
+ ARM::VLD2d32PseudoWB_fixed,
+ ARM::VLD1q64PseudoWB_fixed};
+ unsigned QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
+ ARM::VLD2q16PseudoWB_fixed,
+ ARM::VLD2q32PseudoWB_fixed };
return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
}
@@ -2876,16 +2895,19 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VST2_UPD: {
- unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD,
- ARM::VST2d32Pseudo_UPD, ARM::VST1q64PseudoWB_fixed};
- unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD,
- ARM::VST2q32Pseudo_UPD };
+ unsigned DOpcodes[] = { ARM::VST2d8PseudoWB_fixed,
+ ARM::VST2d16PseudoWB_fixed,
+ ARM::VST2d32PseudoWB_fixed,
+ ARM::VST1q64PseudoWB_fixed};
+ unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
+ ARM::VST2q16PseudoWB_fixed,
+ ARM::VST2q32PseudoWB_fixed };
return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
}
case ARMISD::VST3_UPD: {
unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD,
- ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD };
+ ARM::VST3d32Pseudo_UPD,ARM::VST1d64TPseudoWB_fixed};
unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
ARM::VST3q16Pseudo_UPD,
ARM::VST3q32Pseudo_UPD };
@@ -2897,7 +2919,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ARMISD::VST4_UPD: {
unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD,
- ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD };
+ ARM::VST4d32Pseudo_UPD,ARM::VST1d64QPseudoWB_fixed};
unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
ARM::VST4q16Pseudo_UPD,
ARM::VST4q32Pseudo_UPD };
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 8c4c06f..c6c1f5b 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -72,7 +72,7 @@ ARMInterworking("arm-interworking", cl::Hidden,
cl::desc("Enable / disable ARM interworking (for debugging only)"),
cl::init(true));
-namespace llvm {
+namespace {
class ARMCCState : public CCState {
public:
ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
@@ -432,7 +432,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
else
addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
- if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
if (!Subtarget->isFPOnlySP())
addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
@@ -467,13 +468,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// v2f64 is legal so that QR subregs can be extracted as f64 elements, but
// neither Neon nor VFP support any arithmetic operations on it.
+ // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
+ // supported for v4f32.
setOperationAction(ISD::FADD, MVT::v2f64, Expand);
setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+ // FIXME: Code duplication: FDIV and FREM are expanded always, see
+ // ARMTargetLowering::addTypeForNEON method for details.
setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
setOperationAction(ISD::FREM, MVT::v2f64, Expand);
+ // FIXME: Create unittest.
+ // In another words, find a way when "copysign" appears in DAG with vector
+ // operands.
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
+ // FIXME: Code duplication: SETCC has custom operation action, see
+ // ARMTargetLowering::addTypeForNEON method for details.
setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
+ // FIXME: Create unittest for FNEG and for FABS.
setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
setOperationAction(ISD::FABS, MVT::v2f64, Expand);
setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
@@ -486,11 +497,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
+ // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
// Neon does not support some operations on v1i64 and v2i64 types.
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
@@ -586,6 +609,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ // These just redirect to CTTZ and CTLZ on ARM.
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand);
+
// Only ARMv6 has BSWAP.
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
@@ -674,7 +701,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
// iff target supports vfp2.
setOperationAction(ISD::BITCAST, MVT::i64, Custom);
@@ -712,7 +740,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f32, Expand);
- if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
}
@@ -723,7 +752,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FMA, MVT::f32, Expand);
// Various VFP goodness
- if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
+ if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
// int <-> fp are custom expanded into bit_convert + ARMISD ops.
if (Subtarget->hasVFP2()) {
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
@@ -751,7 +780,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setStackPointerRegisterToSaveRestore(ARM::SP);
- if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
+ if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||
+ !Subtarget->hasVFP2())
setSchedulingPreference(Sched::RegPressure);
else
setSchedulingPreference(Sched::Hybrid);
@@ -1092,7 +1122,8 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
if (!Subtarget->isAAPCS_ABI())
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
else if (Subtarget->hasVFP2() &&
- FloatABIType == FloatABI::Hard && !isVarArg)
+ getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
+ !isVarArg)
return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
}
@@ -2951,7 +2982,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
- if (UnsafeFPMath &&
+ if (getTargetMachine().Options.UnsafeFPMath &&
(CC == ISD::SETEQ || CC == ISD::SETOEQ ||
CC == ISD::SETNE || CC == ISD::SETUNE)) {
SDValue Result = OptimizeVFPBrcond(Op, DAG);
@@ -3978,9 +4009,8 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
// Use vmov.f32 to materialize other v2f32 and v4f32 splats.
- if (VT == MVT::v2f32 || VT == MVT::v4f32) {
- ConstantFPSDNode *C = cast<ConstantFPSDNode>(Op.getOperand(0));
- int ImmVal = ARM_AM::getFP32Imm(C->getValueAPF());
+ if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
+ int ImmVal = ARM_AM::getFP32Imm(SplatBits);
if (ImmVal != -1) {
SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
@@ -6010,7 +6040,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
// executed.
for (MachineBasicBlock::reverse_iterator
II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
- if (!II->getDesc().isCall()) continue;
+ if (!II->isCall()) continue;
DenseMap<unsigned, bool> DefRegs;
for (MachineInstr::mop_iterator
@@ -6421,13 +6451,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
- const MCInstrDesc *MCID = &MI->getDesc();
- if (!MCID->hasPostISelHook()) {
+ if (!MI->hasPostISelHook()) {
assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
"Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
return;
}
+ const MCInstrDesc *MCID = &MI->getDesc();
// Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
// RSC. Coming out of isel, they have an implicit CPSR def, but the optional
// operand is still set to noreg. If needed, set the optional operand's
@@ -6454,7 +6484,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// Any ARM instruction that sets the 's' bit should specify an optional
// "cc_out" operand in the last operand position.
- if (!MCID->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
+ if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
assert(!NewOpc && "Optional cc_out operand required");
return;
}
@@ -7948,7 +7978,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
// will return -0, so vmin can only be used for unsafe math or if one of
// the operands is known to be nonzero.
if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
- !UnsafeFPMath &&
+ !DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
@@ -7970,7 +8000,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
// will return +0, so vmax can only be used for unsafe math or if one of
// the operands is known to be nonzero.
if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
- !UnsafeFPMath &&
+ !DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 6940156..80f3773 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -201,21 +201,29 @@ def msr_mask : Operand<i32> {
// 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0>
// 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0>
// 64 64 - <imm> is encoded in imm6<5:0>
+def shr_imm8_asm_operand : ImmAsmOperand { let Name = "ShrImm8"; }
def shr_imm8 : Operand<i32> {
let EncoderMethod = "getShiftRight8Imm";
let DecoderMethod = "DecodeShiftRight8Imm";
+ let ParserMatchClass = shr_imm8_asm_operand;
}
+def shr_imm16_asm_operand : ImmAsmOperand { let Name = "ShrImm16"; }
def shr_imm16 : Operand<i32> {
let EncoderMethod = "getShiftRight16Imm";
let DecoderMethod = "DecodeShiftRight16Imm";
+ let ParserMatchClass = shr_imm16_asm_operand;
}
+def shr_imm32_asm_operand : ImmAsmOperand { let Name = "ShrImm32"; }
def shr_imm32 : Operand<i32> {
let EncoderMethod = "getShiftRight32Imm";
let DecoderMethod = "DecodeShiftRight32Imm";
+ let ParserMatchClass = shr_imm32_asm_operand;
}
+def shr_imm64_asm_operand : ImmAsmOperand { let Name = "ShrImm64"; }
def shr_imm64 : Operand<i32> {
let EncoderMethod = "getShiftRight64Imm";
let DecoderMethod = "DecodeShiftRight64Imm";
+ let ParserMatchClass = shr_imm64_asm_operand;
}
//===----------------------------------------------------------------------===//
@@ -231,6 +239,14 @@ class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>;
class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>;
+class NEONInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasNEON]>;
+
+
+class VFP2MnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
+ Requires<[HasVFP2]>;
+class NEONMnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
+ Requires<[HasNEON]>;
//===----------------------------------------------------------------------===//
// ARM Instruction templates.
@@ -1994,73 +2010,111 @@ class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
// VFP/NEON Instruction aliases for type suffices.
class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result> :
- InstAlias<!strconcat(opc, dt, asm), Result>;
-multiclass VFPDT8ReqInstAlias<string opc, string asm, dag Result> {
- def I8 : VFPDataTypeInstAlias<opc, ".i8", asm, Result>;
- def S8 : VFPDataTypeInstAlias<opc, ".s8", asm, Result>;
- def U8 : VFPDataTypeInstAlias<opc, ".u8", asm, Result>;
- def F8 : VFPDataTypeInstAlias<opc, ".p8", asm, Result>;
-}
-// VFPDT8ReqInstAlias plus plain ".8"
-multiclass VFPDT8InstAlias<string opc, string asm, dag Result> {
- def _8 : VFPDataTypeInstAlias<opc, ".8", asm, Result>;
- defm : VFPDT8ReqInstAlias<opc, asm, Result>;
-}
-multiclass VFPDT16ReqInstAlias<string opc, string asm, dag Result> {
- def I16 : VFPDataTypeInstAlias<opc, ".i16", asm, Result>;
- def S16 : VFPDataTypeInstAlias<opc, ".s16", asm, Result>;
- def U16 : VFPDataTypeInstAlias<opc, ".u16", asm, Result>;
- def F16 : VFPDataTypeInstAlias<opc, ".p16", asm, Result>;
-}
-// VFPDT16ReqInstAlias plus plain ".16"
-multiclass VFPDT16InstAlias<string opc, string asm, dag Result> {
- def _16 : VFPDataTypeInstAlias<opc, ".16", asm, Result>;
- defm : VFPDT16ReqInstAlias<opc, asm, Result>;
-}
-multiclass VFPDT32ReqInstAlias<string opc, string asm, dag Result> {
- def I32 : VFPDataTypeInstAlias<opc, ".i32", asm, Result>;
- def S32 : VFPDataTypeInstAlias<opc, ".s32", asm, Result>;
- def U32 : VFPDataTypeInstAlias<opc, ".u32", asm, Result>;
- def F32 : VFPDataTypeInstAlias<opc, ".f32", asm, Result>;
- def F : VFPDataTypeInstAlias<opc, ".f", asm, Result>;
-}
-// VFPDT32ReqInstAlias plus plain ".32"
-multiclass VFPDT32InstAlias<string opc, string asm, dag Result> {
- def _32 : VFPDataTypeInstAlias<opc, ".32", asm, Result>;
- defm : VFPDT32ReqInstAlias<opc, asm, Result>;
-}
-multiclass VFPDT64ReqInstAlias<string opc, string asm, dag Result> {
- def I64 : VFPDataTypeInstAlias<opc, ".i64", asm, Result>;
- def S64 : VFPDataTypeInstAlias<opc, ".s64", asm, Result>;
- def U64 : VFPDataTypeInstAlias<opc, ".u64", asm, Result>;
- def F64 : VFPDataTypeInstAlias<opc, ".f64", asm, Result>;
- def D : VFPDataTypeInstAlias<opc, ".d", asm, Result>;
-}
-// VFPDT64ReqInstAlias plus plain ".64"
-multiclass VFPDT64InstAlias<string opc, string asm, dag Result> {
- def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
- defm : VFPDT64ReqInstAlias<opc, asm, Result>;
-}
-multiclass VFPDT64NoF64ReqInstAlias<string opc, string asm, dag Result> {
- def I64 : VFPDataTypeInstAlias<opc, ".i64", asm, Result>;
- def S64 : VFPDataTypeInstAlias<opc, ".s64", asm, Result>;
- def U64 : VFPDataTypeInstAlias<opc, ".u64", asm, Result>;
- def D : VFPDataTypeInstAlias<opc, ".d", asm, Result>;
-}
-// VFPDT64ReqInstAlias plus plain ".64"
-multiclass VFPDT64NoF64InstAlias<string opc, string asm, dag Result> {
- def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
- defm : VFPDT64ReqInstAlias<opc, asm, Result>;
-}
+ InstAlias<!strconcat(opc, dt, "\t", asm), Result>, Requires<[HasVFP2]>;
+
multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> {
- defm : VFPDT8InstAlias<opc, asm, Result>;
- defm : VFPDT16InstAlias<opc, asm, Result>;
- defm : VFPDT32InstAlias<opc, asm, Result>;
- defm : VFPDT64InstAlias<opc, asm, Result>;
-}
-multiclass VFPDTAnyNoF64InstAlias<string opc, string asm, dag Result> {
- defm : VFPDT8InstAlias<opc, asm, Result>;
- defm : VFPDT16InstAlias<opc, asm, Result>;
- defm : VFPDT32InstAlias<opc, asm, Result>;
- defm : VFPDT64NoF64InstAlias<opc, asm, Result>;
-}
+ def : VFPDataTypeInstAlias<opc, ".8", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".16", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".32", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
+}
+
+// The same alias classes using AsmPseudo instead, for the more complex
+// stuff in NEON that InstAlias can't quite handle.
+// Note that we can't use anonymous defm references here like we can
+// above, as we care about the ultimate instruction enum names generated, unlike
+// for instalias defs.
+class NEONDataTypeAsmPseudoInst<string opc, string dt, string asm, dag iops> :
+ AsmPseudoInst<!strconcat(opc, dt, "\t", asm), iops>, Requires<[HasNEON]>;
+multiclass NEONDT8ReqAsmPseudoInst<string opc, string asm, dag iops> {
+ def I8 : NEONDataTypeAsmPseudoInst<opc, ".i8", asm, iops>;
+ def S8 : NEONDataTypeAsmPseudoInst<opc, ".s8", asm, iops>;
+ def U8 : NEONDataTypeAsmPseudoInst<opc, ".u8", asm, iops>;
+ def P8 : NEONDataTypeAsmPseudoInst<opc, ".p8", asm, iops>;
+}
+// NEONDT8ReqAsmPseudoInst plus plain ".8"
+multiclass NEONDT8AsmPseudoInst<string opc, string asm, dag iops> {
+ def _8 : NEONDataTypeAsmPseudoInst<opc, ".8", asm, iops>;
+ defm _ : NEONDT8ReqAsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDT16ReqAsmPseudoInst<string opc, string asm, dag iops> {
+ def I16 : NEONDataTypeAsmPseudoInst<opc, ".i16", asm, iops>;
+ def S16 : NEONDataTypeAsmPseudoInst<opc, ".s16", asm, iops>;
+ def U16 : NEONDataTypeAsmPseudoInst<opc, ".u16", asm, iops>;
+ def P16 : NEONDataTypeAsmPseudoInst<opc, ".p16", asm, iops>;
+}
+// NEONDT16ReqAsmPseudoInst plus plain ".16"
+multiclass NEONDT16AsmPseudoInst<string opc, string asm, dag iops> {
+ def _16 : NEONDataTypeAsmPseudoInst<opc, ".16", asm, iops>;
+ defm _ : NEONDT16ReqAsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDT32ReqAsmPseudoInst<string opc, string asm, dag iops> {
+ def I32 : NEONDataTypeAsmPseudoInst<opc, ".i32", asm, iops>;
+ def S32 : NEONDataTypeAsmPseudoInst<opc, ".s32", asm, iops>;
+ def U32 : NEONDataTypeAsmPseudoInst<opc, ".u32", asm, iops>;
+ def F32 : NEONDataTypeAsmPseudoInst<opc, ".f32", asm, iops>;
+ def F : NEONDataTypeAsmPseudoInst<opc, ".f", asm, iops>;
+}
+// NEONDT32ReqAsmPseudoInst plus plain ".32"
+multiclass NEONDT32AsmPseudoInst<string opc, string asm, dag iops> {
+ def _32 : NEONDataTypeAsmPseudoInst<opc, ".32", asm, iops>;
+ defm _ : NEONDT32ReqAsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDT64ReqAsmPseudoInst<string opc, string asm, dag iops> {
+ def I64 : NEONDataTypeAsmPseudoInst<opc, ".i64", asm, iops>;
+ def S64 : NEONDataTypeAsmPseudoInst<opc, ".s64", asm, iops>;
+ def U64 : NEONDataTypeAsmPseudoInst<opc, ".u64", asm, iops>;
+ def F64 : NEONDataTypeAsmPseudoInst<opc, ".f64", asm, iops>;
+ def D : NEONDataTypeAsmPseudoInst<opc, ".d", asm, iops>;
+}
+// NEONDT64ReqAsmPseudoInst plus plain ".64"
+multiclass NEONDT64AsmPseudoInst<string opc, string asm, dag iops> {
+ def _64 : NEONDataTypeAsmPseudoInst<opc, ".64", asm, iops>;
+ defm _ : NEONDT64ReqAsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDT64NoF64ReqAsmPseudoInst<string opc, string asm, dag iops> {
+ def I64 : NEONDataTypeAsmPseudoInst<opc, ".i64", asm, iops>;
+ def S64 : NEONDataTypeAsmPseudoInst<opc, ".s64", asm, iops>;
+ def U64 : NEONDataTypeAsmPseudoInst<opc, ".u64", asm, iops>;
+ def D : NEONDataTypeAsmPseudoInst<opc, ".d", asm, iops>;
+}
+// NEONDT64ReqAsmPseudoInst plus plain ".64"
+multiclass NEONDT64NoF64AsmPseudoInst<string opc, string asm, dag iops> {
+ def _64 : NEONDataTypeAsmPseudoInst<opc, ".64", asm, iops>;
+ defm _ : NEONDT64ReqAsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDTAnyAsmPseudoInst<string opc, string asm, dag iops> {
+ defm _ : NEONDT8AsmPseudoInst<opc, asm, iops>;
+ defm _ : NEONDT16AsmPseudoInst<opc, asm, iops>;
+ defm _ : NEONDT32AsmPseudoInst<opc, asm, iops>;
+ defm _ : NEONDT64AsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDTAnyNoF64AsmPseudoInst<string opc, string asm, dag iops> {
+ defm _ : NEONDT8AsmPseudoInst<opc, asm, iops>;
+ defm _ : NEONDT16AsmPseudoInst<opc, asm, iops>;
+ defm _ : NEONDT32AsmPseudoInst<opc, asm, iops>;
+ defm _ : NEONDT64NoF64AsmPseudoInst<opc, asm, iops>;
+}
+
+// Data type suffix token aliases. Implements Table A7-3 in the ARM ARM.
+def : TokenAlias<".s8", ".i8">;
+def : TokenAlias<".u8", ".i8">;
+def : TokenAlias<".s16", ".i16">;
+def : TokenAlias<".u16", ".i16">;
+def : TokenAlias<".s32", ".i32">;
+def : TokenAlias<".u32", ".i32">;
+def : TokenAlias<".s64", ".i64">;
+def : TokenAlias<".u64", ".i64">;
+
+def : TokenAlias<".i8", ".8">;
+def : TokenAlias<".i16", ".16">;
+def : TokenAlias<".i32", ".32">;
+def : TokenAlias<".i64", ".64">;
+
+def : TokenAlias<".p8", ".8">;
+def : TokenAlias<".p16", ".16">;
+
+def : TokenAlias<".f32", ".32">;
+def : TokenAlias<".f64", ".64">;
+def : TokenAlias<".f", ".f32">;
+def : TokenAlias<".d", ".f64">;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index be03924..516a080 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -238,27 +238,23 @@ def so_imm_not_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
}]>;
-/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
-def imm1_15 : ImmLeaf<i32, [{
- return (int32_t)Imm >= 1 && (int32_t)Imm < 16;
-}]>;
-
/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
def imm16_31 : ImmLeaf<i32, [{
return (int32_t)Imm >= 16 && (int32_t)Imm < 32;
}]>;
-def so_imm_neg :
- PatLeaf<(imm), [{
+def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; }
+def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1;
- }], so_imm_neg_XFORM>;
+ }], so_imm_neg_XFORM> {
+ let ParserMatchClass = so_imm_neg_asmoperand;
+}
// Note: this pattern doesn't require an encoder method and such, as it's
// only used on aliases (Pat<> and InstAlias<>). The actual encoding
-// is handled by the destination instructions, which use t2_so_imm.
+// is handled by the destination instructions, which use so_imm.
def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; }
-def so_imm_not :
- Operand<i32>, PatLeaf<(imm), [{
+def so_imm_not : Operand<i32>, PatLeaf<(imm), [{
return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
}], so_imm_not_XFORM> {
let ParserMatchClass = so_imm_not_asmoperand;
@@ -512,6 +508,14 @@ def arm_i32imm : PatLeaf<(imm), [{
return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
}]>;
+/// imm0_1 predicate - Immediate in the range [0,1].
+def Imm0_1AsmOperand: ImmAsmOperand { let Name = "Imm0_1"; }
+def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
+
+/// imm0_3 predicate - Immediate in the range [0,3].
+def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
+def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
+
/// imm0_7 predicate - Immediate in the range [0,7].
def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
@@ -520,6 +524,42 @@ def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_7AsmOperand;
}
+/// imm8 predicate - Immediate is exactly 8.
+def Imm8AsmOperand: ImmAsmOperand { let Name = "Imm8"; }
+def imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 8; }]> {
+ let ParserMatchClass = Imm8AsmOperand;
+}
+
+/// imm16 predicate - Immediate is exactly 16.
+def Imm16AsmOperand: ImmAsmOperand { let Name = "Imm16"; }
+def imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 16; }]> {
+ let ParserMatchClass = Imm16AsmOperand;
+}
+
+/// imm32 predicate - Immediate is exactly 32.
+def Imm32AsmOperand: ImmAsmOperand { let Name = "Imm32"; }
+def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> {
+ let ParserMatchClass = Imm32AsmOperand;
+}
+
+/// imm1_7 predicate - Immediate in the range [1,7].
+def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; }
+def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> {
+ let ParserMatchClass = Imm1_7AsmOperand;
+}
+
+/// imm1_15 predicate - Immediate in the range [1,15].
+def Imm1_15AsmOperand: ImmAsmOperand { let Name = "Imm1_15"; }
+def imm1_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 16; }]> {
+ let ParserMatchClass = Imm1_15AsmOperand;
+}
+
+/// imm1_31 predicate - Immediate in the range [1,31].
+def Imm1_31AsmOperand: ImmAsmOperand { let Name = "Imm1_31"; }
+def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> {
+ let ParserMatchClass = Imm1_31AsmOperand;
+}
+
/// imm0_15 predicate - Immediate in the range [0,15].
def Imm0_15AsmOperand: ImmAsmOperand { let Name = "Imm0_15"; }
def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
@@ -544,6 +584,14 @@ def imm0_32 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_32AsmOperand;
}
+/// imm0_63 predicate - True if the 32-bit immediate is in the range [0,63].
+def Imm0_63AsmOperand: ImmAsmOperand { let Name = "Imm0_63"; }
+def imm0_63 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 64;
+}]> {
+ let ParserMatchClass = Imm0_63AsmOperand;
+}
+
/// imm0_255 predicate - Immediate in the range [0,255].
def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; }
def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
@@ -812,6 +860,9 @@ def addrmode6dup : Operand<i32>,
let PrintMethod = "printAddrMode6Operand";
let MIOperandInfo = (ops GPR:$addr, i32imm);
let EncoderMethod = "getAddrMode6DupAddressOpValue";
+ // FIXME: This is close, but not quite right. The alignment specifier is
+ // different.
+ let ParserMatchClass = AddrMode6AsmOperand;
}
// addrmodepc := pc + reg
@@ -2753,23 +2804,25 @@ defm STRHT : AI3strT<0b1011, "strht">;
// Load / store multiple Instructions.
//
-multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
+multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f,
InstrItinClass itin, InstrItinClass itin_upd> {
// IA is the default, so no need for an explicit suffix on the
// mnemonic here. Without it is the cannonical spelling.
def IA :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
- !strconcat(asm, "${p}\t$Rn, $regs"), "", []> {
+ !strconcat(asm, "${p}\t$Rn, $regs", sfx), "", []> {
let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = P_bit;
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
}
def IA_UPD :
AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeUpd, f, itin_upd,
- !strconcat(asm, "${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ !strconcat(asm, "${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = P_bit;
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
@@ -2778,16 +2831,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
def DA :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
- !strconcat(asm, "da${p}\t$Rn, $regs"), "", []> {
+ !strconcat(asm, "da${p}\t$Rn, $regs", sfx), "", []> {
let Inst{24-23} = 0b00; // Decrement After
+ let Inst{22} = P_bit;
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
}
def DA_UPD :
AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeUpd, f, itin_upd,
- !strconcat(asm, "da${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ !strconcat(asm, "da${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
let Inst{24-23} = 0b00; // Decrement After
+ let Inst{22} = P_bit;
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
@@ -2796,16 +2851,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
def DB :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
- !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+ !strconcat(asm, "db${p}\t$Rn, $regs", sfx), "", []> {
let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = P_bit;
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
}
def DB_UPD :
AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeUpd, f, itin_upd,
- !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ !strconcat(asm, "db${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = P_bit;
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
@@ -2814,16 +2871,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
def IB :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
- !strconcat(asm, "ib${p}\t$Rn, $regs"), "", []> {
+ !strconcat(asm, "ib${p}\t$Rn, $regs", sfx), "", []> {
let Inst{24-23} = 0b11; // Increment Before
+ let Inst{22} = P_bit;
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
}
def IB_UPD :
AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeUpd, f, itin_upd,
- !strconcat(asm, "ib${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ !strconcat(asm, "ib${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
let Inst{24-23} = 0b11; // Increment Before
+ let Inst{22} = P_bit;
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
@@ -2834,10 +2893,12 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
let neverHasSideEffects = 1 in {
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
-defm LDM : arm_ldst_mult<"ldm", 1, LdStMulFrm, IIC_iLoad_m, IIC_iLoad_mu>;
+defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m,
+ IIC_iLoad_mu>;
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
-defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>;
+defm STM : arm_ldst_mult<"stm", "", 0, 0, LdStMulFrm, IIC_iStore_m,
+ IIC_iStore_mu>;
} // neverHasSideEffects
@@ -2851,6 +2912,16 @@ def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
(LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
RegConstraint<"$Rn = $wb">;
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm sysLDM : arm_ldst_mult<"ldm", " ^", 1, 1, LdStMulFrm, IIC_iLoad_m,
+ IIC_iLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m,
+ IIC_iStore_mu>;
+
+
+
//===----------------------------------------------------------------------===//
// Move Instructions.
//
@@ -4999,6 +5070,32 @@ def : MnemonicAlias<"usubaddx", "usax">;
// for isel.
def : ARMInstAlias<"mov${s}${p} $Rd, $imm",
(MVNi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"mvn${s}${p} $Rd, $imm",
+ (MOVi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>;
+// Same for AND <--> BIC
+def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+ (ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"bic${s}${p} $Rdn, $imm",
+ (ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm",
+ (BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"and${s}${p} $Rdn, $imm",
+ (BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+
+// Likewise, "add Rd, so_imm_neg" -> sub
+def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (SUBri GPR:$Rd, GPR:$Rn, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"add${s}${p} $Rd, $imm",
+ (SUBri GPR:$Rd, GPR:$Rd, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
+// Same for CMP <--> CMN via so_imm_neg
+def : ARMInstAlias<"cmp${p} $Rd, $imm",
+ (CMNzri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
+def : ARMInstAlias<"cmn${p} $Rd, $imm",
+ (CMPri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
// The shifter forms of the MOV instruction are aliased to the ASR, LSL,
// LSR, ROR, and RRX instructions.
@@ -5056,4 +5153,8 @@ def : ARMInstAlias<"ror${s}${p} $Rn, $Rm",
// 'mul' instruction can be specified with only two operands.
def : ARMInstAlias<"mul${s}${p} $Rn, $Rm",
- (MUL rGPR:$Rn, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+ (MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p, cc_out:$s)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : ARMInstAlias<"neg${s}${p} $Rd, $Rm",
+ (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index f2ca963..c40860d 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -74,9 +74,11 @@ def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
let MIOperandInfo = (ops i32imm);
}
+// Register list of one D register.
def VecListOneDAsmOperand : AsmOperandClass {
let Name = "VecListOneD";
let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
}
def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
let ParserMatchClass = VecListOneDAsmOperand;
@@ -85,6 +87,7 @@ def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
def VecListTwoDAsmOperand : AsmOperandClass {
let Name = "VecListTwoD";
let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
}
def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> {
let ParserMatchClass = VecListTwoDAsmOperand;
@@ -93,6 +96,7 @@ def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> {
def VecListThreeDAsmOperand : AsmOperandClass {
let Name = "VecListThreeD";
let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
}
def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
let ParserMatchClass = VecListThreeDAsmOperand;
@@ -101,6 +105,7 @@ def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
def VecListFourDAsmOperand : AsmOperandClass {
let Name = "VecListFourD";
let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
}
def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
let ParserMatchClass = VecListFourDAsmOperand;
@@ -109,11 +114,92 @@ def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
def VecListTwoQAsmOperand : AsmOperandClass {
let Name = "VecListTwoQ";
let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
}
-def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwo"> {
+def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwoSpaced"> {
let ParserMatchClass = VecListTwoQAsmOperand;
}
+// Register list of one D register, with "all lanes" subscripting.
+def VecListOneDAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
+ let ParserMatchClass = VecListOneDAllLanesAsmOperand;
+}
+// Register list of two D registers, with "all lanes" subscripting.
+def VecListTwoDAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListTwoDAllLanes : RegisterOperand<DPR, "printVectorListTwoAllLanes"> {
+ let ParserMatchClass = VecListTwoDAllLanesAsmOperand;
+}
+
+// Register list of one D register, with byte lane subscripting.
+def VecListOneDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListOneDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// Register list of two D registers, with byte lane subscripting.
+def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
//===----------------------------------------------------------------------===//
// NEON-specific DAG Nodes.
//===----------------------------------------------------------------------===//
@@ -272,12 +358,23 @@ class VLDQWBregisterPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
(ins addrmode6:$addr, rGPR:$offset), itin,
"$addr.addr = $wb">;
+
class VLDQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
class VLDQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset), itin,
"$addr.addr = $wb">;
+class VLDQQWBfixedPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr), itin,
+ "$addr.addr = $wb">;
+class VLDQQWBregisterPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, rGPR:$offset), itin,
+ "$addr.addr = $wb">;
+
+
class VLDQQQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
"$src = $dst">;
@@ -462,31 +559,23 @@ defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">;
def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
// VLD2 : Vector Load (multiple 2-element structures)
-class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy>
+class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
+ InstrItinClass itin>
: NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
- (ins addrmode6:$Rn), IIC_VLD2,
- "vld2", Dt, "$Vd, $Rn", "", []> {
- let Rm = 0b1111;
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
-}
-class VLD2Q<bits<4> op7_4, string Dt, RegisterOperand VdTy>
- : NLdSt<0, 0b10, 0b0011, op7_4,
- (outs VdTy:$Vd),
- (ins addrmode6:$Rn), IIC_VLD2x2,
+ (ins addrmode6:$Rn), itin,
"vld2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
}
-def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8", VecListTwoD>;
-def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16", VecListTwoD>;
-def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32", VecListTwoD>;
+def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2>;
+def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2>;
+def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2>;
-def VLD2q8 : VLD2Q<{0,0,?,?}, "8", VecListFourD>;
-def VLD2q16 : VLD2Q<{0,1,?,?}, "16", VecListFourD>;
-def VLD2q32 : VLD2Q<{1,0,?,?}, "32", VecListFourD>;
+def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>;
+def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>;
+def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>;
def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>;
def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>;
@@ -497,47 +586,56 @@ def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
// ...with address register writeback:
-class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy>
- : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u,
- "vld2", Dt, "$Vd, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
-}
-class VLD2QWB<bits<4> op7_4, string Dt, RegisterOperand VdTy>
- : NLdSt<0, 0b10, 0b0011, op7_4,
- (outs VdTy:$Vd, GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u,
- "vld2", Dt, "$Vd, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
+ RegisterOperand VdTy, InstrItinClass itin> {
+ def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn), itin,
+ "vld2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm), itin,
+ "vld2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>;
-def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>;
-def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>;
+defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2u>;
+defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2u>;
+defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2u>;
-def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8", VecListFourD>;
-def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16", VecListFourD>;
-def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32", VecListFourD>;
+defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>;
+defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>;
+defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>;
-def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
-def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
-def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2d8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>;
+def VLD2d16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>;
+def VLD2d32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>;
+def VLD2d8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>;
+def VLD2d16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>;
+def VLD2d32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>;
-def VLD2q8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
-def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
-def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
// ...with double-spaced registers
-def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8", VecListTwoQ>;
-def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
-def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
-def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>;
-def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
-def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
+def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2>;
+def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2>;
+def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2>;
+defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2u>;
+defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2u>;
+defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2u>;
// VLD3 : Vector Load (multiple 3-element structures)
class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -997,9 +1095,11 @@ def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
// VLD1DUP : Vector Load (single element to all lanes)
class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn),
- IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "",
- [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
+ (ins addrmode6dup:$Rn),
+ IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
+ [(set VecListOneDAllLanes:$Vd,
+ (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
@@ -1025,9 +1125,9 @@ def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
class VLD1QDUP<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd),
(ins addrmode6dup:$Rn), IIC_VLD1dup,
- "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+ "vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
@@ -1038,32 +1138,63 @@ def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
// ...with address register writeback:
-class VLD1DUPWB<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb),
- (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
- "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLD1DupInstruction";
+multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListOneDAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListOneDAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-class VLD1QDUPWB<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
- (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
- "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLD1DupInstruction";
+multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListTwoDAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListTwoDAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">;
-def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">;
-def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">;
+defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">;
+defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">;
+defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">;
-def VLD1DUPq8_UPD : VLD1QDUPWB<{0,0,1,0}, "8">;
-def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">;
-def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">;
+defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">;
+defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">;
+defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">;
-def VLD1DUPq8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
-def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
-def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+def VLD1DUPq8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
+def VLD1DUPq16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
+def VLD1DUPq32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
+def VLD1DUPq8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
+def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
+def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
class VLD2DUP<bits<4> op7_4, string Dt>
@@ -1329,94 +1460,109 @@ def VST1q64PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>;
// ...with 3 registers
class VST1D3<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3),
- IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+ (ins addrmode6:$Rn, VecListThreeD:$Vd),
+ IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVSTInstruction";
}
-class VST1D3WB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm,
- DPR:$Vd, DPR:$src2, DPR:$src3),
- IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST1D3WB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
+ IIC_VLD1x3u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-def VST1d8T : VST1D3<{0,0,0,?}, "8">;
-def VST1d16T : VST1D3<{0,1,0,?}, "16">;
-def VST1d32T : VST1D3<{1,0,0,?}, "32">;
-def VST1d64T : VST1D3<{1,1,0,?}, "64">;
+def VST1d8T : VST1D3<{0,0,0,?}, "8">;
+def VST1d16T : VST1D3<{0,1,0,?}, "16">;
+def VST1d32T : VST1D3<{1,0,0,?}, "32">;
+def VST1d64T : VST1D3<{1,1,0,?}, "64">;
-def VST1d8T_UPD : VST1D3WB<{0,0,0,?}, "8">;
-def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">;
-def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">;
-def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">;
+defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">;
+defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">;
+defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">;
+defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">;
-def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
-def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
+def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
// ...with 4 registers
class VST1D4<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "",
+ (ins addrmode6:$Rn, VecListFourD:$Vd),
+ IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
[]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
}
-class VST1D4WB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm,
- DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u,
- "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST1D4WB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+ IIC_VLD1x4u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
-def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
-def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
-def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
+def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
+def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
+def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
+def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
-def VST1d8Q_UPD : VST1D4WB<{0,0,?,?}, "8">;
-def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">;
-def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">;
-def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">;
+defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">;
+defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">;
+defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">;
+defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">;
-def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
-def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
+def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
// VST2 : Vector Store (multiple 2-element structures)
-class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, op11_8, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2),
- IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
- let Rm = 0b1111;
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
-}
-class VST2Q<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0011, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
- "", []> {
+class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
+ InstrItinClass itin>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd),
+ itin, "vst2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
}
-def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">;
-def VST2d16 : VST2D<0b1000, {0,1,?,?}, "16">;
-def VST2d32 : VST2D<0b1000, {1,0,?,?}, "32">;
+def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VST2>;
+def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VST2>;
+def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VST2>;
-def VST2q8 : VST2Q<{0,0,?,?}, "8">;
-def VST2q16 : VST2Q<{0,1,?,?}, "16">;
-def VST2q32 : VST2Q<{1,0,?,?}, "32">;
+def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>;
+def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>;
+def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>;
def VST2d8Pseudo : VSTQPseudo<IIC_VST2>;
def VST2d16Pseudo : VSTQPseudo<IIC_VST2>;
@@ -1427,47 +1573,76 @@ def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
// ...with address register writeback:
-class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
- IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
+ RegisterOperand VdTy> {
+ def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-class VST2QWB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm,
- DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u,
- "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST2QWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+ IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">;
-def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">;
-def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">;
+defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>;
+defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>;
+defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>;
-def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">;
-def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">;
-def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">;
+defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">;
+defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">;
+defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">;
-def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
-def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
-def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2d8PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>;
+def VST2d16PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>;
+def VST2d32PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>;
+def VST2d8PseudoWB_register : VSTQWBPseudo<IIC_VST2u>;
+def VST2d16PseudoWB_register : VSTQWBPseudo<IIC_VST2u>;
+def VST2d32PseudoWB_register : VSTQWBPseudo<IIC_VST2u>;
-def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
-def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
-def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q8PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q16PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q32PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q8PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q16PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q32PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>;
// ...with double-spaced registers
-def VST2b8 : VST2D<0b1001, {0,0,?,?}, "8">;
-def VST2b16 : VST2D<0b1001, {0,1,?,?}, "16">;
-def VST2b32 : VST2D<0b1001, {1,0,?,?}, "32">;
-def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8">;
-def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">;
-def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">;
+def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VST2>;
+def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VST2>;
+def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VST2>;
+defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>;
+defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
+defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
// VST3 : Vector Store (multiple 3-element structures)
class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1741,10 +1916,10 @@ def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
// ...with address register writeback:
class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
- "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
- "$addr.addr = $wb", []> {
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
+ "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVST2LN";
}
@@ -2573,9 +2748,9 @@ class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
// Long shift by immediate.
class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, op6, op4,
- (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm,
+ (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
[(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm),
(i32 imm:$SIMM))))]>;
@@ -2805,14 +2980,11 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
v4i32, v4i32, OpNode, Commutable>;
}
-multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
- def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
- v4i16, ShOp>;
- def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"),
- v2i32, ShOp>;
- def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
- v8i16, v4i16, ShOp>;
- def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"),
+multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
+ def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
+ def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
+ def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
+ def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
v4i32, v2i32, ShOp>;
}
@@ -3477,15 +3649,15 @@ multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
- OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> {
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
- OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> {
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
- OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> {
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
}
@@ -3574,7 +3746,7 @@ def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
v2f32, v2f32, fmul, 1>;
def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
v4f32, v4f32, fmul, 1>;
-defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>;
+defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>;
def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
v2f32, fmul>;
@@ -4285,18 +4457,18 @@ defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
// VSHLL : Vector Shift Left Long (with maximum shift count)
class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
- ValueType OpTy, SDNode OpNode>
+ ValueType OpTy, Operand ImmTy, SDNode OpNode>
: N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
- ResTy, OpTy, OpNode> {
+ ResTy, OpTy, ImmTy, OpNode> {
let Inst{21-16} = op21_16;
let DecoderMethod = "DecodeVSHLMaxInstruction";
}
def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
- v8i16, v8i8, NEONvshlli>;
+ v8i16, v8i8, imm8, NEONvshlli>;
def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
- v4i32, v4i16, NEONvshlli>;
+ v4i32, v4i16, imm16, NEONvshlli>;
def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
- v2i64, v2i32, NEONvshlli>;
+ v2i64, v2i32, imm32, NEONvshlli>;
// VSHRN : Vector Shift Right and Narrow
defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
@@ -4469,10 +4641,6 @@ def : InstAlias<"vmov${p} $Vd, $Vm",
(VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
def : InstAlias<"vmov${p} $Vd, $Vm",
(VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
-defm : VFPDTAnyNoF64InstAlias<"vmov${p}", "$Vd, $Vm",
- (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
-defm : VFPDTAnyNoF64InstAlias<"vmov${p}", "$Vd, $Vm",
- (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
// VMOV : Vector Move (Immediate)
@@ -4932,34 +5100,34 @@ def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
// VEXT : Vector Extract
-class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
+class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
: N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm,
+ (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
[(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
- (Ty DPR:$Vm), imm:$index)))]> {
+ (Ty DPR:$Vm), imm:$index)))]> {
bits<4> index;
let Inst{11-8} = index{3-0};
}
-class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
+class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
: N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
- (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm,
+ (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
[(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
- (Ty QPR:$Vm), imm:$index)))]> {
+ (Ty QPR:$Vm), imm:$index)))]> {
bits<4> index;
let Inst{11-8} = index{3-0};
}
-def VEXTd8 : VEXTd<"vext", "8", v8i8> {
+def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
let Inst{11-8} = index{3-0};
}
-def VEXTd16 : VEXTd<"vext", "16", v4i16> {
+def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
let Inst{11-9} = index{2-0};
let Inst{8} = 0b0;
}
-def VEXTd32 : VEXTd<"vext", "32", v2i32> {
+def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
let Inst{11-10} = index{1-0};
let Inst{9-8} = 0b00;
}
@@ -4968,17 +5136,21 @@ def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
(i32 imm:$index))),
(VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
-def VEXTq8 : VEXTq<"vext", "8", v16i8> {
+def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> {
let Inst{11-8} = index{3-0};
}
-def VEXTq16 : VEXTq<"vext", "16", v8i16> {
+def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
let Inst{11-9} = index{2-0};
let Inst{8} = 0b0;
}
-def VEXTq32 : VEXTq<"vext", "32", v4i32> {
+def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
let Inst{11-10} = index{1-0};
let Inst{9-8} = 0b00;
}
+def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
+ let Inst{11} = index{0};
+ let Inst{10-8} = 0b000;
+}
def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
(v4f32 QPR:$Vm),
(i32 imm:$index))),
@@ -5026,17 +5198,17 @@ def VTBL1
let hasExtraSrcRegAllocReq = 1 in {
def VTBL2
: N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2,
- "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>;
+ (ins VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
def VTBL3
: N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3,
- "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>;
+ (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
def VTBL4
: N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm),
+ (ins VecListFourD:$Vn, DPR:$Vm),
NVTBLFrm, IIC_VTB4,
- "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>;
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
} // hasExtraSrcRegAllocReq = 1
def VTBL2Pseudo
@@ -5056,18 +5228,18 @@ def VTBX1
let hasExtraSrcRegAllocReq = 1 in {
def VTBX2
: N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
- (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
- "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>;
+ (ins DPR:$orig, VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
+ "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
def VTBX3
: N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
- (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm),
+ (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
NVTBLFrm, IIC_VTBX3,
- "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm",
+ "vtbx", "8", "$Vd, $Vn, $Vm",
"$orig = $Vd", []>;
def VTBX4
- : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn,
- DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
- "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm",
+ : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
+ "vtbx", "8", "$Vd, $Vn, $Vm",
"$orig = $Vd", []>;
} // hasExtraSrcRegAllocReq = 1
@@ -5207,11 +5379,83 @@ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
// Assembler aliases
//
-// VAND/VEOR/VORR accept but do not require a type suffix.
+def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
+ (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
+def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
+ (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
+
+
+// VADD two-operand aliases.
+def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm",
+ (VADDv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm",
+ (VADDv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm",
+ (VADDv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm",
+ (VADDv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm",
+ (VADDv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm",
+ (VADDv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm",
+ (VADDv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm",
+ (VADDv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm",
+ (VADDfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm",
+ (VADDfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSUB two-operand aliases.
+def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm",
+ (VSUBv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm",
+ (VSUBv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm",
+ (VSUBv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm",
+ (VSUBv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm",
+ (VSUBv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm",
+ (VSUBv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm",
+ (VSUBv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm",
+ (VSUBv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm",
+ (VSUBfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm",
+ (VSUBfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VADDW two-operand aliases.
+def : NEONInstAlias<"vaddw${p}.s8 $Vdn, $Vm",
+ (VADDWsv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.s16 $Vdn, $Vm",
+ (VADDWsv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.s32 $Vdn, $Vm",
+ (VADDWsv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u8 $Vdn, $Vm",
+ (VADDWuv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u16 $Vdn, $Vm",
+ (VADDWuv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm",
+ (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+// VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
(VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
(VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
+ (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
+ (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
(VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
@@ -5220,245 +5464,450 @@ defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
(VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
(VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
-
-// VLD1 requires a size suffix, but also accepts type specific variants.
-// Load one D register.
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d8 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d16 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d32 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d64 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>;
-// with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d8wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d16wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d32wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d64wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-// with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d8wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d16wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d32wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d64wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-
-// Load two D registers.
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1q8 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1q16 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1q32 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1q64 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>;
-// with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1q8wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1q16wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1q32wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1q64wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-// with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1q8wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1q16wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1q32wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1q64wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-
-// Load three D registers.
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d8T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d16T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d32T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d64T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>;
-// with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d8Twb_fixed VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d16Twb_fixed VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d32Twb_fixed VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d64Twb_fixed VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-// with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d8Twb_register VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d16Twb_register VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d32Twb_register VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d64Twb_register VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-
-
-// Load four D registers.
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d8Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d16Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d32Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d64Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>;
-// with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d8Qwb_fixed VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d16Qwb_fixed VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d32Qwb_fixed VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d64Qwb_fixed VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-// with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d8Qwb_register VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d16Qwb_register VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d32Qwb_register VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d64Qwb_register VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-
-// VST1 requires a size suffix, but also accepts type specific variants.
-// Store one D register.
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1d8 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1d16 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1d32 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1d64 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-// with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1d8wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1d16wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1d32wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1d64wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-// with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1d8wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm,
- VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1d16wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm,
- VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1d32wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm,
- VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1d64wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm,
- VecListOneD:$Vd, pred:$p)>;
-
-// Store two D registers.
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1q8 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1q16 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1q32 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1q64 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-// with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1q8wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1q16wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1q32wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1q64wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-// with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1q8wb_register zero_reg, addrmode6:$Rn,
- rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1q16wb_register zero_reg, addrmode6:$Rn,
- rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1q32wb_register zero_reg, addrmode6:$Rn,
- rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1q64wb_register zero_reg, addrmode6:$Rn,
- rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>;
-
-// FIXME: The three and four register VST1 instructions haven't been moved
-// to the VecList* encoding yet, so we can't do assembly parsing support
-// for them. Uncomment these when that happens.
-// Load three D registers.
-//defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d8T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>;
-//defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d16T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>;
-//defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d32T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>;
-//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d64T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>;
-
-// Load four D registers.
-//defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d8Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>;
-//defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d16Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>;
-//defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d32Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>;
-//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d64Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>;
-
-
-// VTRN instructions data type suffix aliases for more-specific types.
-defm : VFPDT8ReqInstAlias <"vtrn${p}", "$Dd, $Dm",
- (VTRNd8 DPR:$Dd, DPR:$Dm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vtrn${p}", "$Dd, $Dm",
- (VTRNd16 DPR:$Dd, DPR:$Dm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vtrn${p}", "$Dd, $Dm",
- (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
-
-defm : VFPDT8ReqInstAlias <"vtrn${p}", "$Qd, $Qm",
- (VTRNq8 QPR:$Qd, QPR:$Qm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vtrn${p}", "$Qd, $Qm",
- (VTRNq16 QPR:$Qd, QPR:$Qm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vtrn${p}", "$Qd, $Qm",
- (VTRNq32 QPR:$Qd, QPR:$Qm, pred:$p)>;
+// ... two-operand aliases
+def : NEONInstAlias<"vand${p} $Vdn, $Vm",
+ (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vand${p} $Vdn, $Vm",
+ (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vbic${p} $Vdn, $Vm",
+ (VBICd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vbic${p} $Vdn, $Vm",
+ (VBICq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"veor${p} $Vdn, $Vm",
+ (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"veor${p} $Vdn, $Vm",
+ (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vorr${p} $Vdn, $Vm",
+ (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vorr${p} $Vdn, $Vm",
+ (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
+ (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
+ (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
+ (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
+ (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
+ (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
+ (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VMUL two-operand aliases.
+def : NEONInstAlias<"vmul${p}.p8 $Qdn, $Qm",
+ (VMULpq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i8 $Qdn, $Qm",
+ (VMULv16i8 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Qm",
+ (VMULv8i16 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Qm",
+ (VMULv4i32 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.p8 $Ddn, $Dm",
+ (VMULpd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i8 $Ddn, $Dm",
+ (VMULv8i8 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm",
+ (VMULv4i16 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm",
+ (VMULv2i32 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Qm",
+ (VMULfq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm",
+ (VMULfd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane",
+ (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm,
+ VectorIndex16:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane",
+ (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm,
+ VectorIndex16:$lane, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane",
+ (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
+ VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane",
+ (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
+ VectorIndex32:$lane, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane",
+ (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
+ VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane",
+ (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
+ VectorIndex32:$lane, pred:$p)>;
+
+// VQADD (register) two-operand aliases.
+def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm",
+ (VQADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm",
+ (VQADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm",
+ (VQADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm",
+ (VQADDsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm",
+ (VQADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm",
+ (VQADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm",
+ (VQADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm",
+ (VQADDuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm",
+ (VQADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm",
+ (VQADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm",
+ (VQADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm",
+ (VQADDsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm",
+ (VQADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm",
+ (VQADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm",
+ (VQADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm",
+ (VQADDuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSHL (immediate) two-operand aliases.
+def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm",
+ (VSHLiv8i8 DPR:$Vdn, DPR:$Vdn, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm",
+ (VSHLiv4i16 DPR:$Vdn, DPR:$Vdn, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm",
+ (VSHLiv2i32 DPR:$Vdn, DPR:$Vdn, imm0_31:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm",
+ (VSHLiv1i64 DPR:$Vdn, DPR:$Vdn, imm0_63:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm",
+ (VSHLiv16i8 QPR:$Vdn, QPR:$Vdn, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm",
+ (VSHLiv8i16 QPR:$Vdn, QPR:$Vdn, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm",
+ (VSHLiv4i32 QPR:$Vdn, QPR:$Vdn, imm0_31:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm",
+ (VSHLiv2i64 QPR:$Vdn, QPR:$Vdn, imm0_63:$imm, pred:$p)>;
+
+// VSHL (register) two-operand aliases.
+def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm",
+ (VSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm",
+ (VSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm",
+ (VSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm",
+ (VSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm",
+ (VSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm",
+ (VSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm",
+ (VSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm",
+ (VSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm",
+ (VSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm",
+ (VSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm",
+ (VSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm",
+ (VSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm",
+ (VSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm",
+ (VSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm",
+ (VSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm",
+ (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSHL (immediate) two-operand aliases.
+def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm",
+ (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm",
+ (VSHRsv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm",
+ (VSHRsv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm",
+ (VSHRsv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm",
+ (VSHRsv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm",
+ (VSHRsv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm",
+ (VSHRsv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm",
+ (VSHRsv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm",
+ (VSHRuv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm",
+ (VSHRuv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm",
+ (VSHRuv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm",
+ (VSHRuv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm",
+ (VSHRuv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm",
+ (VSHRuv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm",
+ (VSHRuv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm",
+ (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+// VLD1 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+defm VLD1LNdAsm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD1LNdAsm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD1LNdAsm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+defm VLD1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr!",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr!",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr!",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD1LNdWB_register_Asm :
+ NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VLD1LNdWB_register_Asm :
+ NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VLD1LNdWB_register_Asm :
+ NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST1 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+defm VST1LNdAsm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST1LNdAsm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST1LNdAsm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+defm VST1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr!",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr!",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr!",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST1LNdWB_register_Asm :
+ NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VST1LNdWB_register_Asm :
+ NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VST1LNdWB_register_Asm :
+ NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VLD2 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+defm VLD2LNdAsm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD2LNdAsm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD2LNdAsm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+defm VLD2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr!",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr!",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr!",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD2LNdWB_register_Asm :
+ NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VLD2LNdWB_register_Asm :
+ NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VLD2LNdWB_register_Asm :
+ NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST2 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+defm VST2LNdAsm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST2LNdAsm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST2LNdAsm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+defm VST2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr!",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr!",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr!",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST2LNdWB_register_Asm :
+ NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VST2LNdWB_register_Asm :
+ NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VST2LNdWB_register_Asm :
+ NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VMOV takes an optional datatype suffix
+defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
+ (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
+ (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+
+// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
+// D-register versions.
+def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
+ (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
+ (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
+ (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
+ (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
+ (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
+ (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
+ (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+// Q-register versions.
+def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
+ (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
+ (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
+ (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
+ (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
+ (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
+ (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
+ (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+
+// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
+// D-register versions.
+def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
+ (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
+ (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
+ (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
+ (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
+ (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
+ (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
+ (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+// Q-register versions.
+def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
+ (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
+ (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
+ (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
+ (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
+ (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
+ (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
+ (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+
+// Two-operand variants for VEXT
+def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm",
+ (VEXTd8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm",
+ (VEXTd16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_3:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm",
+ (VEXTd32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_1:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm",
+ (VEXTq8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm",
+ (VEXTq16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm",
+ (VEXTq32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_3:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.64 $Vdn, $Vm, $imm",
+ (VEXTq64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_1:$imm, pred:$p)>;
+
+// Two-operand variants for VQDMULH
+def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm",
+ (VQDMULHv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm",
+ (VQDMULHv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm",
+ (VQDMULHv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm",
+ (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
+// these should restrict to just the Q register variants, but the register
+// classes are enough to match correctly regardless, so we keep it simple
+// and just use MnemonicAlias.
+def : NEONMnemonicAlias<"vbicq", "vbic">;
+def : NEONMnemonicAlias<"vandq", "vand">;
+def : NEONMnemonicAlias<"veorq", "veor">;
+def : NEONMnemonicAlias<"vorrq", "vorr">;
+
+def : NEONMnemonicAlias<"vmovq", "vmov">;
+def : NEONMnemonicAlias<"vmvnq", "vmvn">;
+// Explicit versions for floating point so that the FPImm variants get
+// handled early. The parser gets confused otherwise.
+def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
+def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
+
+def : NEONMnemonicAlias<"vaddq", "vadd">;
+def : NEONMnemonicAlias<"vsubq", "vsub">;
+
+def : NEONMnemonicAlias<"vminq", "vmin">;
+def : NEONMnemonicAlias<"vmaxq", "vmax">;
+
+def : NEONMnemonicAlias<"vmulq", "vmul">;
+
+def : NEONMnemonicAlias<"vabsq", "vabs">;
+
+def : NEONMnemonicAlias<"vshlq", "vshl">;
+def : NEONMnemonicAlias<"vshrq", "vshr">;
+
+def : NEONMnemonicAlias<"vcvtq", "vcvt">;
+
+def : NEONMnemonicAlias<"vcleq", "vcle">;
+def : NEONMnemonicAlias<"vceqq", "vceq">;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index c6cc98d..ac1a229 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -1131,9 +1131,6 @@ def tRSB : // A8.6.141
"rsb", "\t$Rd, $Rn, #0",
[(set tGPR:$Rd, (ineg tGPR:$Rn))]>;
-def : tInstAlias<"neg${s}${p} $Rd, $Rm",
- (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>;
-
// Subtract with carry register
let Uses = [CPSR] in
def tSBC : // A8.6.151
@@ -1435,3 +1432,8 @@ def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>;
// nothing).
def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : tInstAlias<"neg${s}${p} $Rd, $Rm",
+ (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>;
+
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 6129fa3..981592c 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -80,18 +80,19 @@ def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
// only used on aliases (Pat<> and InstAlias<>). The actual encoding
// is handled by the destination instructions, which use t2_so_imm.
def t2_so_imm_not_asmoperand : AsmOperandClass { let Name = "T2SOImmNot"; }
-def t2_so_imm_not : Operand<i32>,
- PatLeaf<(imm), [{
+def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{
return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1;
}], t2_so_imm_not_XFORM> {
let ParserMatchClass = t2_so_imm_not_asmoperand;
}
// t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
-def t2_so_imm_neg : Operand<i32>,
- PatLeaf<(imm), [{
+def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; }
+def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1;
-}], t2_so_imm_neg_XFORM>;
+}], t2_so_imm_neg_XFORM> {
+ let ParserMatchClass = t2_so_imm_neg_asmoperand;
+}
/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
def imm0_4095 : Operand<i32>,
@@ -1333,7 +1334,7 @@ def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
let mayStore = 1, neverHasSideEffects = 1 in {
def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb),
- (ins rGPR:$Rt, t2addrmode_imm8:$addr),
+ (ins GPRnopc:$Rt, t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
"str", "\t$Rt, $addr!",
"$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
@@ -1357,13 +1358,13 @@ def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb),
} // mayStore = 1, neverHasSideEffects = 1
def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
- (ins rGPR:$Rt, addr_offset_none:$Rn,
+ (ins GPRnopc:$Rt, addr_offset_none:$Rn,
t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
"str", "\t$Rt, $Rn$offset",
"$Rn = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPRnopc:$Rn_wb,
- (post_store rGPR:$Rt, addr_offset_none:$Rn,
+ (post_store GPRnopc:$Rt, addr_offset_none:$Rn,
t2am_imm8_offset:$offset))]>;
def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb),
@@ -3971,6 +3972,18 @@ def : t2InstAlias<"push${p} $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>;
def : t2InstAlias<"pop${p}.w $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
def : t2InstAlias<"pop${p} $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
+// STMIA/STMIA_UPD aliases w/o the optional .w suffix
+def : t2InstAlias<"stm${p} $Rn, $regs",
+ (t2STMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"stm${p} $Rn!, $regs",
+ (t2STMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// LDMIA/LDMIA_UPD aliases w/o the optional .w suffix
+def : t2InstAlias<"ldm${p} $Rn, $regs",
+ (t2LDMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"ldm${p} $Rn!, $regs",
+ (t2LDMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
// STMDB/STMDB_UPD aliases w/ the optional .w suffix
def : t2InstAlias<"stmdb${p}.w $Rn, $regs",
(t2STMDB GPR:$Rn, pred:$p, reglist:$regs)>;
@@ -4084,8 +4097,50 @@ def : t2InstAlias<"sxth${p} $Rd, $Rm$rot",
// for isel.
def : t2InstAlias<"mov${p} $Rd, $imm",
(t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+def : t2InstAlias<"mvn${p} $Rd, $imm",
+ (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+// Same for AND <--> BIC
+def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+ (t2ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"bic${s}${p} $Rdn, $imm",
+ (t2ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm",
+ (t2BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"and${s}${p} $Rdn, $imm",
+ (t2BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+// Likewise, "add Rd, t2_so_imm_neg" -> sub
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${s}${p} $Rd, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm,
+ pred:$p, cc_out:$s)>;
+// Same for CMP <--> CMN via t2_so_imm_neg
+def : t2InstAlias<"cmp${p} $Rd, $imm",
+ (t2CMNzri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
+def : t2InstAlias<"cmn${p} $Rd, $imm",
+ (t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
// Wide 'mul' encoding can be specified with only two operands.
def : t2InstAlias<"mul${p} $Rn, $Rm",
- (t2MUL rGPR:$Rn, rGPR:$Rn, rGPR:$Rm, pred:$p)>;
+ (t2MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : t2InstAlias<"neg${s}${p} $Rd, $Rm",
+ (t2RSBri rGPR:$Rd, rGPR:$Rm, 0, pred:$p, cc_out:$s)>;
+
+// MOV so_reg assembler pseudos. InstAlias isn't expressive enough for
+// these, unfortunately.
+def t2MOVsi: t2AsmPseudo<"mov${p} $Rd, $shift",
+ (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+def t2MOVSsi: t2AsmPseudo<"movs${p} $Rd, $shift",
+ (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+
+// ADR w/o the .w suffix
+def : t2InstAlias<"adr${p} $Rd, $addr",
+ (t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index e420135..5d43556 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -1160,18 +1160,64 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
//===----------------------------------------------------------------------===//
// Assembler aliases.
//
+// A few mnemnoic aliases for pre-unifixed syntax. We don't guarantee to
+// support them all, but supporting at least some of the basics is
+// good to be friendly.
+def : VFP2MnemonicAlias<"flds", "vldr">;
+def : VFP2MnemonicAlias<"fldd", "vldr">;
+def : VFP2MnemonicAlias<"fmrs", "vmov">;
+def : VFP2MnemonicAlias<"fmsr", "vmov">;
+def : VFP2MnemonicAlias<"fsqrts", "vsqrt">;
+def : VFP2MnemonicAlias<"fsqrtd", "vsqrt">;
+def : VFP2MnemonicAlias<"fadds", "vadd.f32">;
+def : VFP2MnemonicAlias<"faddd", "vadd.f64">;
+def : VFP2MnemonicAlias<"fmrdd", "vmov">;
+def : VFP2MnemonicAlias<"fmrds", "vmov">;
+def : VFP2MnemonicAlias<"fmrrd", "vmov">;
+def : VFP2MnemonicAlias<"fmdrr", "vmov">;
+def : VFP2MnemonicAlias<"fmuld", "vmul.f64">;
+def : VFP2MnemonicAlias<"fnegs", "vneg.f32">;
+def : VFP2MnemonicAlias<"fnegd", "vneg.f64">;
+def : VFP2MnemonicAlias<"ftosizd", "vcvt.s32.f64">;
+def : VFP2MnemonicAlias<"ftosid", "vcvtr.s32.f64">;
+def : VFP2MnemonicAlias<"ftosizs", "vcvt.s32.f32">;
+def : VFP2MnemonicAlias<"ftosis", "vcvtr.s32.f32">;
+def : VFP2MnemonicAlias<"ftouizd", "vcvt.u32.f64">;
+def : VFP2MnemonicAlias<"ftouid", "vcvtr.u32.f64">;
+def : VFP2MnemonicAlias<"ftouizs", "vcvt.u32.f32">;
+def : VFP2MnemonicAlias<"ftouis", "vcvtr.u32.f32">;
+def : VFP2MnemonicAlias<"fsitod", "vcvt.f64.s32">;
+def : VFP2MnemonicAlias<"fsitos", "vcvt.f32.s32">;
+def : VFP2MnemonicAlias<"fuitod", "vcvt.f64.u32">;
+def : VFP2MnemonicAlias<"fuitos", "vcvt.f32.u32">;
+def : VFP2MnemonicAlias<"fsts", "vstr">;
+def : VFP2MnemonicAlias<"fstd", "vstr">;
+def : VFP2MnemonicAlias<"fmacd", "vmla.f64">;
+def : VFP2MnemonicAlias<"fmacs", "vmla.f32">;
def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
+def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
+ (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"faddd${p} $Dd, $Dn, $Dm",
+ (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm",
+ (VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"fsubd${p} $Dd, $Dn, $Dm",
+ (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+
+// No need for the size suffix on VSQRT. It's implied by the register classes.
+def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>;
// VLDR/VSTR accept an optional type suffix.
-defm : VFPDT32InstAlias<"vldr${p}", "$Sd, $addr",
- (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
-defm : VFPDT32InstAlias<"vstr${p}", "$Sd, $addr",
- (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
-defm : VFPDT64InstAlias<"vldr${p}", "$Dd, $addr",
- (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
-defm : VFPDT64InstAlias<"vstr${p}", "$Dd, $addr",
- (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr",
+ (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vstr${p}.32 $Sd, $addr",
+ (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vldr${p}.64 $Dd, $addr",
+ (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vstr${p}.64 $Dd, $addr",
+ (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
// VMUL has a two-operand form (implied destination operand)
def : VFP2InstAlias<"vmul${p}.f64 $Dn, $Dm",
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index c8728f4..6712fb6 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -33,6 +33,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -1471,19 +1472,18 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
while (++I != E) {
if (I->isDebugValue() || MemOps.count(&*I))
continue;
- const MCInstrDesc &MCID = I->getDesc();
- if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
+ if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
return false;
- if (isLd && MCID.mayStore())
+ if (isLd && I->mayStore())
return false;
if (!isLd) {
- if (MCID.mayLoad())
+ if (I->mayLoad())
return false;
// It's not safe to move the first 'str' down.
// str r1, [r0]
// strh r5, [r0]
// str r4, [r0, #+4]
- if (MCID.mayStore())
+ if (I->mayStore())
return false;
}
for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
@@ -1773,8 +1773,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
while (MBBI != E) {
for (; MBBI != E; ++MBBI) {
MachineInstr *MI = MBBI;
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.isCall() || MCID.isTerminator()) {
+ if (MI->isCall() || MI->isTerminator()) {
// Stop at barriers.
++MBBI;
break;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 6cbb24b..61b75cb 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -38,22 +38,25 @@ extern "C" void LLVMInitializeARMTarget() {
///
ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS),
JITInfo(),
InstrItins(Subtarget.getInstrItineraryData()) {
// Default to soft float ABI
- if (FloatABIType == FloatABI::Default)
- FloatABIType = FloatABI::Soft;
+ if (Options.FloatABIType == FloatABI::Default)
+ this->Options.FloatABIType = FloatABI::Soft;
}
ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM, OL), InstrInfo(Subtarget),
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ InstrInfo(Subtarget),
DataLayout(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:64-i64:32:64-"
"v128:32:128-v64:32:64-n32-S32") :
@@ -73,9 +76,10 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
InstrInfo(Subtarget.hasThumb2()
? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
: ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
@@ -143,10 +147,16 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM) {
}
bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM) {
- if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb())
- PM.add(createThumb2SizeReductionPass());
+ if (Subtarget.isThumb2()) {
+ if (!Subtarget.prefers32BitThumb())
+ PM.add(createThumb2SizeReductionPass());
+
+ // Constant island pass work on unbundled instructions.
+ PM.add(createUnpackMachineBundlesPass());
+ }
PM.add(createARMConstantIslandPass());
+
return true;
}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index a1f517b..cd77822 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -41,6 +41,7 @@ private:
public:
ARMBaseTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
@@ -71,6 +72,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
public:
ARMTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
@@ -112,6 +114,7 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
public:
ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 19defa1..721a225 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -36,6 +36,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
ELF::SHF_WRITE |
ELF::SHF_ALLOC,
SectionKind::getDataRel());
+ StructorOutputOrder = Structors::PriorityOrder;
LSDASection = NULL;
}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index bb83e5e..cd86065 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -39,10 +39,15 @@ namespace {
class ARMOperand;
+enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
+
class ARMAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
+ // Map of register aliases registers via the .req directive.
+ StringMap<unsigned> RegisterReqs;
+
struct {
ARMCC::CondCodes Cond; // Condition for IT block.
unsigned Mask:4; // Condition mask for instructions.
@@ -90,9 +95,12 @@ class ARMAsmParser : public MCTargetAsmParser {
unsigned &ShiftAmount);
bool parseDirectiveWord(unsigned Size, SMLoc L);
bool parseDirectiveThumb(SMLoc L);
+ bool parseDirectiveARM(SMLoc L);
bool parseDirectiveThumbFunc(SMLoc L);
bool parseDirectiveCode(SMLoc L);
bool parseDirectiveSyntax(SMLoc L);
+ bool parseDirectiveReq(StringRef Name, SMLoc L);
+ bool parseDirectiveUnreq(SMLoc L);
StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
bool &CarrySetting, unsigned &ProcessorIMod,
@@ -161,6 +169,7 @@ class ARMAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index);
// Asm Match Converter Methods
bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode,
@@ -271,6 +280,8 @@ class ARMOperand : public MCParsedAsmOperand {
k_DPRRegisterList,
k_SPRRegisterList,
k_VectorList,
+ k_VectorListAllLanes,
+ k_VectorListIndexed,
k_ShiftedRegister,
k_ShiftedImmediate,
k_ShifterImmediate,
@@ -324,6 +335,8 @@ class ARMOperand : public MCParsedAsmOperand {
struct {
unsigned RegNum;
unsigned Count;
+ unsigned LaneIndex;
+ bool isDoubleSpaced;
} VectorList;
struct {
@@ -409,6 +422,8 @@ public:
Registers = o.Registers;
break;
case k_VectorList:
+ case k_VectorListAllLanes:
+ case k_VectorListIndexed:
VectorList = o.VectorList;
break;
case k_CoprocNum:
@@ -562,6 +577,22 @@ public:
int64_t Value = CE->getValue();
return Value >= 0 && Value < 256;
}
+ bool isImm0_1() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 2;
+ }
+ bool isImm0_3() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 4;
+ }
bool isImm0_7() const {
if (Kind != k_Immediate)
return false;
@@ -586,6 +617,94 @@ public:
int64_t Value = CE->getValue();
return Value >= 0 && Value < 32;
}
+ bool isImm0_63() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 64;
+ }
+ bool isImm8() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 8;
+ }
+ bool isImm16() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 16;
+ }
+ bool isImm32() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 32;
+ }
+ bool isShrImm8() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 8;
+ }
+ bool isShrImm16() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 16;
+ }
+ bool isShrImm32() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 32;
+ }
+ bool isShrImm64() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 64;
+ }
+ bool isImm1_7() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 8;
+ }
+ bool isImm1_15() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 16;
+ }
+ bool isImm1_31() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 32;
+ }
bool isImm1_16() const {
if (Kind != k_Immediate)
return false;
@@ -676,6 +795,14 @@ public:
int64_t Value = CE->getValue();
return ARM_AM::getSOImmVal(~Value) != -1;
}
+ bool isARMSOImmNeg() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getSOImmVal(-Value) != -1;
+ }
bool isT2SOImm() const {
if (Kind != k_Immediate)
return false;
@@ -692,6 +819,14 @@ public:
int64_t Value = CE->getValue();
return ARM_AM::getT2SOImmVal(~Value) != -1;
}
+ bool isT2SOImmNeg() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getT2SOImmVal(-Value) != -1;
+ }
bool isSetEndImm() const {
if (Kind != k_Immediate)
return false;
@@ -892,9 +1027,9 @@ public:
if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset in range [-255, -1].
- if (!Memory.OffsetImm) return true;
+ if (!Memory.OffsetImm) return false;
int64_t Val = Memory.OffsetImm->getValue();
- return Val > -256 && Val < 0;
+ return (Val == INT32_MIN) || (Val > -256 && Val < 0);
}
bool isMemUImm12Offset() const {
if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
@@ -940,31 +1075,75 @@ public:
bool isProcIFlags() const { return Kind == k_ProcIFlags; }
// NEON operands.
+ bool isSingleSpacedVectorList() const {
+ return Kind == k_VectorList && !VectorList.isDoubleSpaced;
+ }
+ bool isDoubleSpacedVectorList() const {
+ return Kind == k_VectorList && VectorList.isDoubleSpaced;
+ }
bool isVecListOneD() const {
- if (Kind != k_VectorList) return false;
+ if (!isSingleSpacedVectorList()) return false;
return VectorList.Count == 1;
}
bool isVecListTwoD() const {
- if (Kind != k_VectorList) return false;
+ if (!isSingleSpacedVectorList()) return false;
return VectorList.Count == 2;
}
bool isVecListThreeD() const {
- if (Kind != k_VectorList) return false;
+ if (!isSingleSpacedVectorList()) return false;
return VectorList.Count == 3;
}
bool isVecListFourD() const {
- if (Kind != k_VectorList) return false;
+ if (!isSingleSpacedVectorList()) return false;
return VectorList.Count == 4;
}
bool isVecListTwoQ() const {
- if (Kind != k_VectorList) return false;
- //FIXME: We haven't taught the parser to handle by-two register lists
- // yet, so don't pretend to know one.
- return VectorList.Count == 2 && false;
+ if (!isDoubleSpacedVectorList()) return false;
+ return VectorList.Count == 2;
+ }
+
+ bool isVecListOneDAllLanes() const {
+ if (Kind != k_VectorListAllLanes) return false;
+ return VectorList.Count == 1;
+ }
+
+ bool isVecListTwoDAllLanes() const {
+ if (Kind != k_VectorListAllLanes) return false;
+ return VectorList.Count == 2;
+ }
+
+ bool isVecListOneDByteIndexed() const {
+ if (Kind != k_VectorListIndexed) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListOneDHWordIndexed() const {
+ if (Kind != k_VectorListIndexed) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListOneDWordIndexed() const {
+ if (Kind != k_VectorListIndexed) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListTwoDByteIndexed() const {
+ if (Kind != k_VectorListIndexed) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListTwoDHWordIndexed() const {
+ if (Kind != k_VectorListIndexed) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListTwoDWordIndexed() const {
+ if (Kind != k_VectorListIndexed) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 1;
}
bool isVectorIndex8() const {
@@ -1233,6 +1412,14 @@ public:
Inst.addOperand(MCOperand::CreateImm(~CE->getValue()));
}
+ void addT2SOImmNegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The operand is actually a t2_so_imm, but we have its
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
+ }
+
void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The operand is actually a so_imm, but we have its bitwise
@@ -1241,6 +1428,14 @@ public:
Inst.addOperand(MCOperand::CreateImm(~CE->getValue()));
}
+ void addARMSOImmNegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The operand is actually a so_imm, but we have its
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
+ }
+
void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
@@ -1527,37 +1722,15 @@ public:
Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
}
- void addVecListOneDOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
- }
-
- void addVecListTwoDOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- // Only the first register actually goes on the instruction. The rest
- // are implied by the opcode.
- Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
- }
-
- void addVecListThreeDOperands(MCInst &Inst, unsigned N) const {
+ void addVecListOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- // Only the first register actually goes on the instruction. The rest
- // are implied by the opcode.
Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
}
- void addVecListFourDOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- // Only the first register actually goes on the instruction. The rest
- // are implied by the opcode.
- Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
- }
-
- void addVecListTwoQOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- // Only the first register actually goes on the instruction. The rest
- // are implied by the opcode.
+ void addVecListIndexedOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
+ Inst.addOperand(MCOperand::CreateImm(VectorList.LaneIndex));
}
void addVectorIndex8Operands(MCInst &Inst, unsigned N) const {
@@ -1780,10 +1953,32 @@ public:
}
static ARMOperand *CreateVectorList(unsigned RegNum, unsigned Count,
- SMLoc S, SMLoc E) {
+ bool isDoubleSpaced, SMLoc S, SMLoc E) {
ARMOperand *Op = new ARMOperand(k_VectorList);
Op->VectorList.RegNum = RegNum;
Op->VectorList.Count = Count;
+ Op->VectorList.isDoubleSpaced = isDoubleSpaced;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateVectorListAllLanes(unsigned RegNum, unsigned Count,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_VectorListAllLanes);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateVectorListIndexed(unsigned RegNum, unsigned Count,
+ unsigned Index, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_VectorListIndexed);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->VectorList.LaneIndex = Index;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
@@ -1982,6 +2177,14 @@ void ARMOperand::print(raw_ostream &OS) const {
OS << "<vector_list " << VectorList.Count << " * "
<< VectorList.RegNum << ">";
break;
+ case k_VectorListAllLanes:
+ OS << "<vector_list(all lanes) " << VectorList.Count << " * "
+ << VectorList.RegNum << ">";
+ break;
+ case k_VectorListIndexed:
+ OS << "<vector_list(lane " << VectorList.LaneIndex << ") "
+ << VectorList.Count << " * " << VectorList.RegNum << ">";
+ break;
case k_Token:
OS << "'" << getToken() << "'";
break;
@@ -2000,7 +2203,9 @@ static unsigned MatchRegisterName(StringRef Name);
bool ARMAsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
+ StartLoc = Parser.getTok().getLoc();
RegNo = tryParseRegister();
+ EndLoc = Parser.getTok().getLoc();
return (RegNo == (unsigned)-1);
}
@@ -2013,8 +2218,6 @@ int ARMAsmParser::tryParseRegister() {
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier)) return -1;
- // FIXME: Validate register for the current architecture; we have to do
- // validation later, so maybe there is no need for this here.
std::string lowerCase = Tok.getString().lower();
unsigned RegNum = MatchRegisterName(lowerCase);
if (!RegNum) {
@@ -2023,9 +2226,34 @@ int ARMAsmParser::tryParseRegister() {
.Case("r14", ARM::LR)
.Case("r15", ARM::PC)
.Case("ip", ARM::R12)
+ // Additional register name aliases for 'gas' compatibility.
+ .Case("a1", ARM::R0)
+ .Case("a2", ARM::R1)
+ .Case("a3", ARM::R2)
+ .Case("a4", ARM::R3)
+ .Case("v1", ARM::R4)
+ .Case("v2", ARM::R5)
+ .Case("v3", ARM::R6)
+ .Case("v4", ARM::R7)
+ .Case("v5", ARM::R8)
+ .Case("v6", ARM::R9)
+ .Case("v7", ARM::R10)
+ .Case("v8", ARM::R11)
+ .Case("sb", ARM::R9)
+ .Case("sl", ARM::R10)
+ .Case("fp", ARM::R11)
.Default(0);
}
- if (!RegNum) return -1;
+ if (!RegNum) {
+ // Check for aliases registered via .req.
+ StringMap<unsigned>::const_iterator Entry =
+ RegisterReqs.find(Tok.getIdentifier());
+ // If no match, return failure.
+ if (Entry == RegisterReqs.end())
+ return -1;
+ Parser.Lex(); // Eat identifier token.
+ return Entry->getValue();
+ }
Parser.Lex(); // Eat identifier token.
@@ -2045,6 +2273,7 @@ int ARMAsmParser::tryParseShiftRegister(
std::string lowerCase = Tok.getString().lower();
ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase)
+ .Case("asl", ARM_AM::lsl)
.Case("lsl", ARM_AM::lsl)
.Case("lsr", ARM_AM::lsr)
.Case("asr", ARM_AM::asr)
@@ -2073,7 +2302,8 @@ int ARMAsmParser::tryParseShiftRegister(
ShiftReg = SrcReg;
} else {
// Figure out if this is shifted by a constant or a register (for non-RRX).
- if (Parser.getTok().is(AsmToken::Hash)) {
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar)) {
Parser.Lex(); // Eat hash.
SMLoc ImmLoc = Parser.getTok().getLoc();
const MCExpr *ShiftExpr = 0;
@@ -2446,6 +2676,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the comma.
RegLoc = Parser.getTok().getLoc();
int OldReg = Reg;
+ const AsmToken RegTok = Parser.getTok();
Reg = tryParseRegister();
if (Reg == -1)
return Error(RegLoc, "register expected");
@@ -2459,8 +2690,13 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!RC->contains(Reg))
return Error(RegLoc, "invalid register in register list");
// List must be monotonically increasing.
- if (getARMRegisterNumbering(Reg) <= getARMRegisterNumbering(OldReg))
+ if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg))
return Error(RegLoc, "register list not in ascending order");
+ if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) {
+ Warning(RegLoc, "duplicated register (" + RegTok.getString() +
+ ") in register list");
+ continue;
+ }
// VFP register lists must also be contiguous.
// It's OK to use the enumeration values directly here rather, as the
// VFP register classes have the enum sorted properly.
@@ -2477,13 +2713,55 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return Error(E, "'}' expected");
Parser.Lex(); // Eat '}' token.
+ // Push the register list operand.
Operands.push_back(ARMOperand::CreateRegList(Registers, S, E));
+
+ // The ARM system instruction variants for LDM/STM have a '^' token here.
+ if (Parser.getTok().is(AsmToken::Caret)) {
+ Operands.push_back(ARMOperand::CreateToken("^",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat '^' token.
+ }
+
return false;
}
+// Helper function to parse the lane index for vector lists.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) {
+ Index = 0; // Always return a defined index value.
+ if (Parser.getTok().is(AsmToken::LBrac)) {
+ Parser.Lex(); // Eat the '['.
+ if (Parser.getTok().is(AsmToken::RBrac)) {
+ // "Dn[]" is the 'all lanes' syntax.
+ LaneKind = AllLanes;
+ Parser.Lex(); // Eat the ']'.
+ return MatchOperand_Success;
+ }
+ if (Parser.getTok().is(AsmToken::Integer)) {
+ int64_t Val = Parser.getTok().getIntVal();
+ // Make this range check context sensitive for .8, .16, .32.
+ if (Val < 0 && Val > 7)
+ Error(Parser.getTok().getLoc(), "lane index out of range");
+ Index = Val;
+ LaneKind = IndexedLane;
+ Parser.Lex(); // Eat the token;
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ Error(Parser.getTok().getLoc(), "']' expected");
+ Parser.Lex(); // Eat the ']'.
+ return MatchOperand_Success;
+ }
+ Error(Parser.getTok().getLoc(), "lane index must be empty or an integer");
+ return MatchOperand_ParseFail;
+ }
+ LaneKind = NoLanes;
+ return MatchOperand_Success;
+}
+
// parse a vector register list
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ VectorLaneTy LaneKind;
+ unsigned LaneIndex;
SMLoc S = Parser.getTok().getLoc();
// As an extension (to match gas), support a plain D register or Q register
// (without encosing curly braces) as a single or double entry list,
@@ -2494,12 +2772,48 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_NoMatch;
SMLoc E = Parser.getTok().getLoc();
if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
- Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, S, E));
+ OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+ if (Res != MatchOperand_Success)
+ return Res;
+ switch (LaneKind) {
+ default:
+ assert(0 && "unexpected lane kind!");
+ case NoLanes:
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E));
+ break;
+ case AllLanes:
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 1,
+ LaneIndex, S,E));
+ break;
+ }
return MatchOperand_Success;
}
if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
Reg = getDRegFromQReg(Reg);
- Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, S, E));
+ OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+ if (Res != MatchOperand_Success)
+ return Res;
+ switch (LaneKind) {
+ default:
+ assert(0 && "unexpected lane kind!");
+ case NoLanes:
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E));
+ break;
+ case AllLanes:
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 2,
+ LaneIndex, S,E));
+ break;
+ }
return MatchOperand_Success;
}
Error(S, "vector register expected");
@@ -2518,18 +2832,30 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_ParseFail;
}
unsigned Count = 1;
+ int Spacing = 0;
unsigned FirstReg = Reg;
// The list is of D registers, but we also allow Q regs and just interpret
// them as the two D sub-registers.
if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
FirstReg = Reg = getDRegFromQReg(Reg);
+ Spacing = 1; // double-spacing requires explicit D registers, otherwise
+ // it's ambiguous with four-register single spaced.
++Reg;
++Count;
}
+ if (parseVectorLane(LaneKind, LaneIndex) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
while (Parser.getTok().is(AsmToken::Comma) ||
Parser.getTok().is(AsmToken::Minus)) {
if (Parser.getTok().is(AsmToken::Minus)) {
+ if (!Spacing)
+ Spacing = 1; // Register range implies a single spaced list.
+ else if (Spacing == 2) {
+ Error(Parser.getTok().getLoc(),
+ "sequential registers in double spaced list");
+ return MatchOperand_ParseFail;
+ }
Parser.Lex(); // Eat the minus.
SMLoc EndLoc = Parser.getTok().getLoc();
int EndReg = tryParseRegister();
@@ -2554,6 +2880,16 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Error(EndLoc, "bad range in register list");
return MatchOperand_ParseFail;
}
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(EndLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
+ EndLoc = Parser.getTok().getLoc();
// Add all the registers in the range to the register list.
Count += EndReg - Reg;
@@ -2575,6 +2911,13 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// The list is of D registers, but we also allow Q regs and just interpret
// them as the two D sub-registers.
if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ if (!Spacing)
+ Spacing = 1; // Register range implies a single spaced list.
+ else if (Spacing == 2) {
+ Error(RegLoc,
+ "invalid register in double-spaced list (must be 'D' register')");
+ return MatchOperand_ParseFail;
+ }
Reg = getDRegFromQReg(Reg);
if (Reg != OldReg + 1) {
Error(RegLoc, "non-contiguous register range");
@@ -2582,14 +2925,45 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
++Reg;
Count += 2;
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ SMLoc EndLoc = Parser.getTok().getLoc();
+ if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(EndLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
continue;
}
- // Normal D register. Just check that it's contiguous and keep going.
- if (Reg != OldReg + 1) {
+ // Normal D register.
+ // Figure out the register spacing (single or double) of the list if
+ // we don't know it already.
+ if (!Spacing)
+ Spacing = 1 + (Reg == OldReg + 2);
+
+ // Just check that it's contiguous and keep going.
+ if (Reg != OldReg + Spacing) {
Error(RegLoc, "non-contiguous register range");
return MatchOperand_ParseFail;
}
++Count;
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ SMLoc EndLoc = Parser.getTok().getLoc();
+ if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(EndLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
+ if (Spacing == 2 && LaneKind != NoLanes) {
+ Error(EndLoc,
+ "lane index specfier invalid in double spaced register list");
+ return MatchOperand_ParseFail;
+ }
}
SMLoc E = Parser.getTok().getLoc();
@@ -2599,7 +2973,22 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
Parser.Lex(); // Eat '}' token.
- Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count, S, E));
+ switch (LaneKind) {
+ default:
+ assert(0 && "unexpected lane kind in register list.");
+ case NoLanes:
+ Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count,
+ (Spacing == 2), S, E));
+ break;
+ case AllLanes:
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count,
+ S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(FirstReg, Count,
+ LaneIndex, S, E));
+ break;
+ }
return MatchOperand_Success;
}
@@ -2786,7 +3175,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
Parser.Lex(); // Eat shift type token.
// There must be a '#' and a shift amount.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2864,7 +3254,8 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the operator.
// A '#' and a shift amount.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2924,7 +3315,8 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the operator.
// A '#' and a rotate amount.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2961,7 +3353,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
// The bitfield descriptor is really two operands, the LSB and the width.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2993,7 +3386,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_ParseFail;
}
Parser.Lex(); // Eat hash token.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -3087,7 +3481,8 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Tok.getLoc();
// Do immediates first, as we always parse those if we have a '#'.
- if (Parser.getTok().is(AsmToken::Hash)) {
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar)) {
Parser.Lex(); // Eat the '#'.
// Explicitly look for a '-', as we need to encode negative zero
// differently.
@@ -3444,7 +3839,7 @@ bool ARMAsmParser::
cvtVLDwbFixed(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Vd
- ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
// Vn
@@ -3458,7 +3853,7 @@ bool ARMAsmParser::
cvtVLDwbRegister(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Vd
- ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
// Vn
@@ -3478,7 +3873,7 @@ cvtVSTwbFixed(MCInst &Inst, unsigned Opcode,
// Vn
((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
// Vt
- ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
return true;
@@ -3494,7 +3889,7 @@ cvtVSTwbRegister(MCInst &Inst, unsigned Opcode,
// Vm
((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
// Vt
- ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
return true;
@@ -3591,8 +3986,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// offset. Be friendly and also accept a plain integer (without a leading
// hash) for gas compatibility.
if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar) ||
Parser.getTok().is(AsmToken::Integer)) {
- if (Parser.getTok().is(AsmToken::Hash))
+ if (Parser.getTok().isNot(AsmToken::Integer))
Parser.Lex(); // Eat the '#'.
E = Parser.getTok().getLoc();
@@ -3690,7 +4086,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
if (Tok.isNot(AsmToken::Identifier))
return true;
StringRef ShiftName = Tok.getString();
- if (ShiftName == "lsl" || ShiftName == "LSL")
+ if (ShiftName == "lsl" || ShiftName == "LSL" ||
+ ShiftName == "asl" || ShiftName == "ASL")
St = ARM_AM::lsl;
else if (ShiftName == "lsr" || ShiftName == "LSR")
St = ARM_AM::lsr;
@@ -3710,7 +4107,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
Loc = Parser.getTok().getLoc();
// A '#' and a shift amount.
const AsmToken &HashTok = Parser.getTok();
- if (HashTok.isNot(AsmToken::Hash))
+ if (HashTok.isNot(AsmToken::Hash) &&
+ HashTok.isNot(AsmToken::Dollar))
return Error(HashTok.getLoc(), "'#' expected");
Parser.Lex(); // Eat hash token.
@@ -3739,7 +4137,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
- if (Parser.getTok().isNot(AsmToken::Hash))
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar))
return MatchOperand_NoMatch;
// Disambiguate the VMOV forms that can accept an FP immediate.
@@ -3852,6 +4251,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return parseMemory(Operands);
case AsmToken::LCurly:
return parseRegisterList(Operands);
+ case AsmToken::Dollar:
case AsmToken::Hash: {
// #42 -> immediate.
// TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
@@ -3990,7 +4390,9 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "mrs" || Mnemonic == "smmls" || Mnemonic == "vabs" ||
Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vmrs" ||
Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" ||
- Mnemonic == "vrsqrts" || Mnemonic == "srs" ||
+ Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" ||
+ Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" ||
+ Mnemonic == "fsts" ||
(Mnemonic == "movs" && isThumb()))) {
Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
CarrySetting = true;
@@ -4206,9 +4608,27 @@ static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
}
+static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features);
/// Parse an arm instruction mnemonic followed by its operands.
bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Apply mnemonic aliases before doing anything else, as the destination
+ // mnemnonic may include suffices and we want to handle them normally.
+ // The generic tblgen'erated code does this later, at the start of
+ // MatchInstructionImpl(), but that's too late for aliases that include
+ // any sort of suffix.
+ unsigned AvailableFeatures = getAvailableFeatures();
+ applyMnemonicAliases(Name, AvailableFeatures);
+
+ // First check for the ARM-specific .req directive.
+ if (Parser.getTok().is(AsmToken::Identifier) &&
+ Parser.getTok().getIdentifier() == ".req") {
+ parseDirectiveReq(Name, NameLoc);
+ // We always return 'error' for this, as we're done with this
+ // statement and don't need to match the 'instruction."
+ return true;
+ }
+
// Create the leading tokens for the mnemonic, split by '.' characters.
size_t Start = 0, Next = Name.find('.');
StringRef Mnemonic = Name.slice(Start, Next);
@@ -4400,12 +4820,21 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
}
}
// Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the
- // end. Convert it to a token here.
+ // end. Convert it to a token here. Take care not to convert those
+ // that should hit the Thumb2 encoding.
if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
static_cast<ARMOperand*>(Operands[5])->isImm()) {
ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]);
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
- if (CE && CE->getValue() == 0) {
+ if (CE && CE->getValue() == 0 &&
+ (isThumbOne() ||
+ // The cc_out operand matches the IT block.
+ ((inITBlock() != CarrySetting) &&
+ // Neither register operand is a high register.
+ (isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) &&
+ isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()))))){
Operands.erase(Operands.begin() + 5);
Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
delete Op;
@@ -4605,11 +5034,495 @@ validateInstruction(MCInst &Inst,
return false;
}
+static unsigned getRealVSTLNOpcode(unsigned Opc) {
+ switch(Opc) {
+ default: assert(0 && "unexpected opcode!");
+ // VST1LN
+ case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_P8:
+ case ARM::VST1LNdWB_fixed_Asm_I8: case ARM::VST1LNdWB_fixed_Asm_S8:
+ case ARM::VST1LNdWB_fixed_Asm_U8:
+ return ARM::VST1LNd8_UPD;
+ case ARM::VST1LNdWB_fixed_Asm_16: case ARM::VST1LNdWB_fixed_Asm_P16:
+ case ARM::VST1LNdWB_fixed_Asm_I16: case ARM::VST1LNdWB_fixed_Asm_S16:
+ case ARM::VST1LNdWB_fixed_Asm_U16:
+ return ARM::VST1LNd16_UPD;
+ case ARM::VST1LNdWB_fixed_Asm_32: case ARM::VST1LNdWB_fixed_Asm_F:
+ case ARM::VST1LNdWB_fixed_Asm_F32: case ARM::VST1LNdWB_fixed_Asm_I32:
+ case ARM::VST1LNdWB_fixed_Asm_S32: case ARM::VST1LNdWB_fixed_Asm_U32:
+ return ARM::VST1LNd32_UPD;
+ case ARM::VST1LNdWB_register_Asm_8: case ARM::VST1LNdWB_register_Asm_P8:
+ case ARM::VST1LNdWB_register_Asm_I8: case ARM::VST1LNdWB_register_Asm_S8:
+ case ARM::VST1LNdWB_register_Asm_U8:
+ return ARM::VST1LNd8_UPD;
+ case ARM::VST1LNdWB_register_Asm_16: case ARM::VST1LNdWB_register_Asm_P16:
+ case ARM::VST1LNdWB_register_Asm_I16: case ARM::VST1LNdWB_register_Asm_S16:
+ case ARM::VST1LNdWB_register_Asm_U16:
+ return ARM::VST1LNd16_UPD;
+ case ARM::VST1LNdWB_register_Asm_32: case ARM::VST1LNdWB_register_Asm_F:
+ case ARM::VST1LNdWB_register_Asm_F32: case ARM::VST1LNdWB_register_Asm_I32:
+ case ARM::VST1LNdWB_register_Asm_S32: case ARM::VST1LNdWB_register_Asm_U32:
+ return ARM::VST1LNd32_UPD;
+ case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_P8:
+ case ARM::VST1LNdAsm_I8: case ARM::VST1LNdAsm_S8:
+ case ARM::VST1LNdAsm_U8:
+ return ARM::VST1LNd8;
+ case ARM::VST1LNdAsm_16: case ARM::VST1LNdAsm_P16:
+ case ARM::VST1LNdAsm_I16: case ARM::VST1LNdAsm_S16:
+ case ARM::VST1LNdAsm_U16:
+ return ARM::VST1LNd16;
+ case ARM::VST1LNdAsm_32: case ARM::VST1LNdAsm_F:
+ case ARM::VST1LNdAsm_F32: case ARM::VST1LNdAsm_I32:
+ case ARM::VST1LNdAsm_S32: case ARM::VST1LNdAsm_U32:
+ return ARM::VST1LNd32;
+
+ // VST2LN
+ case ARM::VST2LNdWB_fixed_Asm_8: case ARM::VST2LNdWB_fixed_Asm_P8:
+ case ARM::VST2LNdWB_fixed_Asm_I8: case ARM::VST2LNdWB_fixed_Asm_S8:
+ case ARM::VST2LNdWB_fixed_Asm_U8:
+ return ARM::VST2LNd8_UPD;
+ case ARM::VST2LNdWB_fixed_Asm_16: case ARM::VST2LNdWB_fixed_Asm_P16:
+ case ARM::VST2LNdWB_fixed_Asm_I16: case ARM::VST2LNdWB_fixed_Asm_S16:
+ case ARM::VST2LNdWB_fixed_Asm_U16:
+ return ARM::VST2LNd16_UPD;
+ case ARM::VST2LNdWB_fixed_Asm_32: case ARM::VST2LNdWB_fixed_Asm_F:
+ case ARM::VST2LNdWB_fixed_Asm_F32: case ARM::VST2LNdWB_fixed_Asm_I32:
+ case ARM::VST2LNdWB_fixed_Asm_S32: case ARM::VST2LNdWB_fixed_Asm_U32:
+ return ARM::VST2LNd32_UPD;
+ case ARM::VST2LNdWB_register_Asm_8: case ARM::VST2LNdWB_register_Asm_P8:
+ case ARM::VST2LNdWB_register_Asm_I8: case ARM::VST2LNdWB_register_Asm_S8:
+ case ARM::VST2LNdWB_register_Asm_U8:
+ return ARM::VST2LNd8_UPD;
+ case ARM::VST2LNdWB_register_Asm_16: case ARM::VST2LNdWB_register_Asm_P16:
+ case ARM::VST2LNdWB_register_Asm_I16: case ARM::VST2LNdWB_register_Asm_S16:
+ case ARM::VST2LNdWB_register_Asm_U16:
+ return ARM::VST2LNd16_UPD;
+ case ARM::VST2LNdWB_register_Asm_32: case ARM::VST2LNdWB_register_Asm_F:
+ case ARM::VST2LNdWB_register_Asm_F32: case ARM::VST2LNdWB_register_Asm_I32:
+ case ARM::VST2LNdWB_register_Asm_S32: case ARM::VST2LNdWB_register_Asm_U32:
+ return ARM::VST2LNd32_UPD;
+ case ARM::VST2LNdAsm_8: case ARM::VST2LNdAsm_P8:
+ case ARM::VST2LNdAsm_I8: case ARM::VST2LNdAsm_S8:
+ case ARM::VST2LNdAsm_U8:
+ return ARM::VST2LNd8;
+ case ARM::VST2LNdAsm_16: case ARM::VST2LNdAsm_P16:
+ case ARM::VST2LNdAsm_I16: case ARM::VST2LNdAsm_S16:
+ case ARM::VST2LNdAsm_U16:
+ return ARM::VST2LNd16;
+ case ARM::VST2LNdAsm_32: case ARM::VST2LNdAsm_F:
+ case ARM::VST2LNdAsm_F32: case ARM::VST2LNdAsm_I32:
+ case ARM::VST2LNdAsm_S32: case ARM::VST2LNdAsm_U32:
+ return ARM::VST2LNd32;
+ }
+}
+
+static unsigned getRealVLDLNOpcode(unsigned Opc) {
+ switch(Opc) {
+ default: assert(0 && "unexpected opcode!");
+ // VLD1LN
+ case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_P8:
+ case ARM::VLD1LNdWB_fixed_Asm_I8: case ARM::VLD1LNdWB_fixed_Asm_S8:
+ case ARM::VLD1LNdWB_fixed_Asm_U8:
+ return ARM::VLD1LNd8_UPD;
+ case ARM::VLD1LNdWB_fixed_Asm_16: case ARM::VLD1LNdWB_fixed_Asm_P16:
+ case ARM::VLD1LNdWB_fixed_Asm_I16: case ARM::VLD1LNdWB_fixed_Asm_S16:
+ case ARM::VLD1LNdWB_fixed_Asm_U16:
+ return ARM::VLD1LNd16_UPD;
+ case ARM::VLD1LNdWB_fixed_Asm_32: case ARM::VLD1LNdWB_fixed_Asm_F:
+ case ARM::VLD1LNdWB_fixed_Asm_F32: case ARM::VLD1LNdWB_fixed_Asm_I32:
+ case ARM::VLD1LNdWB_fixed_Asm_S32: case ARM::VLD1LNdWB_fixed_Asm_U32:
+ return ARM::VLD1LNd32_UPD;
+ case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_P8:
+ case ARM::VLD1LNdWB_register_Asm_I8: case ARM::VLD1LNdWB_register_Asm_S8:
+ case ARM::VLD1LNdWB_register_Asm_U8:
+ return ARM::VLD1LNd8_UPD;
+ case ARM::VLD1LNdWB_register_Asm_16: case ARM::VLD1LNdWB_register_Asm_P16:
+ case ARM::VLD1LNdWB_register_Asm_I16: case ARM::VLD1LNdWB_register_Asm_S16:
+ case ARM::VLD1LNdWB_register_Asm_U16:
+ return ARM::VLD1LNd16_UPD;
+ case ARM::VLD1LNdWB_register_Asm_32: case ARM::VLD1LNdWB_register_Asm_F:
+ case ARM::VLD1LNdWB_register_Asm_F32: case ARM::VLD1LNdWB_register_Asm_I32:
+ case ARM::VLD1LNdWB_register_Asm_S32: case ARM::VLD1LNdWB_register_Asm_U32:
+ return ARM::VLD1LNd32_UPD;
+ case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_P8:
+ case ARM::VLD1LNdAsm_I8: case ARM::VLD1LNdAsm_S8:
+ case ARM::VLD1LNdAsm_U8:
+ return ARM::VLD1LNd8;
+ case ARM::VLD1LNdAsm_16: case ARM::VLD1LNdAsm_P16:
+ case ARM::VLD1LNdAsm_I16: case ARM::VLD1LNdAsm_S16:
+ case ARM::VLD1LNdAsm_U16:
+ return ARM::VLD1LNd16;
+ case ARM::VLD1LNdAsm_32: case ARM::VLD1LNdAsm_F:
+ case ARM::VLD1LNdAsm_F32: case ARM::VLD1LNdAsm_I32:
+ case ARM::VLD1LNdAsm_S32: case ARM::VLD1LNdAsm_U32:
+ return ARM::VLD1LNd32;
+
+ // VLD2LN
+ case ARM::VLD2LNdWB_fixed_Asm_8: case ARM::VLD2LNdWB_fixed_Asm_P8:
+ case ARM::VLD2LNdWB_fixed_Asm_I8: case ARM::VLD2LNdWB_fixed_Asm_S8:
+ case ARM::VLD2LNdWB_fixed_Asm_U8:
+ return ARM::VLD2LNd8_UPD;
+ case ARM::VLD2LNdWB_fixed_Asm_16: case ARM::VLD2LNdWB_fixed_Asm_P16:
+ case ARM::VLD2LNdWB_fixed_Asm_I16: case ARM::VLD2LNdWB_fixed_Asm_S16:
+ case ARM::VLD2LNdWB_fixed_Asm_U16:
+ return ARM::VLD2LNd16_UPD;
+ case ARM::VLD2LNdWB_fixed_Asm_32: case ARM::VLD2LNdWB_fixed_Asm_F:
+ case ARM::VLD2LNdWB_fixed_Asm_F32: case ARM::VLD2LNdWB_fixed_Asm_I32:
+ case ARM::VLD2LNdWB_fixed_Asm_S32: case ARM::VLD2LNdWB_fixed_Asm_U32:
+ return ARM::VLD2LNd32_UPD;
+ case ARM::VLD2LNdWB_register_Asm_8: case ARM::VLD2LNdWB_register_Asm_P8:
+ case ARM::VLD2LNdWB_register_Asm_I8: case ARM::VLD2LNdWB_register_Asm_S8:
+ case ARM::VLD2LNdWB_register_Asm_U8:
+ return ARM::VLD2LNd8_UPD;
+ case ARM::VLD2LNdWB_register_Asm_16: case ARM::VLD2LNdWB_register_Asm_P16:
+ case ARM::VLD2LNdWB_register_Asm_I16: case ARM::VLD2LNdWB_register_Asm_S16:
+ case ARM::VLD2LNdWB_register_Asm_U16:
+ return ARM::VLD2LNd16_UPD;
+ case ARM::VLD2LNdWB_register_Asm_32: case ARM::VLD2LNdWB_register_Asm_F:
+ case ARM::VLD2LNdWB_register_Asm_F32: case ARM::VLD2LNdWB_register_Asm_I32:
+ case ARM::VLD2LNdWB_register_Asm_S32: case ARM::VLD2LNdWB_register_Asm_U32:
+ return ARM::VLD2LNd32_UPD;
+ case ARM::VLD2LNdAsm_8: case ARM::VLD2LNdAsm_P8:
+ case ARM::VLD2LNdAsm_I8: case ARM::VLD2LNdAsm_S8:
+ case ARM::VLD2LNdAsm_U8:
+ return ARM::VLD2LNd8;
+ case ARM::VLD2LNdAsm_16: case ARM::VLD2LNdAsm_P16:
+ case ARM::VLD2LNdAsm_I16: case ARM::VLD2LNdAsm_S16:
+ case ARM::VLD2LNdAsm_U16:
+ return ARM::VLD2LNd16;
+ case ARM::VLD2LNdAsm_32: case ARM::VLD2LNdAsm_F:
+ case ARM::VLD2LNdAsm_F32: case ARM::VLD2LNdAsm_I32:
+ case ARM::VLD2LNdAsm_S32: case ARM::VLD2LNdAsm_U32:
+ return ARM::VLD2LNd32;
+ }
+}
+
bool ARMAsmParser::
processInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
switch (Inst.getOpcode()) {
- // Handle the MOV complex aliases.
+ // Handle NEON VST complex aliases.
+ case ARM::VST1LNdWB_register_Asm_8: case ARM::VST1LNdWB_register_Asm_P8:
+ case ARM::VST1LNdWB_register_Asm_I8: case ARM::VST1LNdWB_register_Asm_S8:
+ case ARM::VST1LNdWB_register_Asm_U8: case ARM::VST1LNdWB_register_Asm_16:
+ case ARM::VST1LNdWB_register_Asm_P16: case ARM::VST1LNdWB_register_Asm_I16:
+ case ARM::VST1LNdWB_register_Asm_S16: case ARM::VST1LNdWB_register_Asm_U16:
+ case ARM::VST1LNdWB_register_Asm_32: case ARM::VST1LNdWB_register_Asm_F:
+ case ARM::VST1LNdWB_register_Asm_F32: case ARM::VST1LNdWB_register_Asm_I32:
+ case ARM::VST1LNdWB_register_Asm_S32: case ARM::VST1LNdWB_register_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdWB_register_Asm_8: case ARM::VST2LNdWB_register_Asm_P8:
+ case ARM::VST2LNdWB_register_Asm_I8: case ARM::VST2LNdWB_register_Asm_S8:
+ case ARM::VST2LNdWB_register_Asm_U8: case ARM::VST2LNdWB_register_Asm_16:
+ case ARM::VST2LNdWB_register_Asm_P16: case ARM::VST2LNdWB_register_Asm_I16:
+ case ARM::VST2LNdWB_register_Asm_S16: case ARM::VST2LNdWB_register_Asm_U16:
+ case ARM::VST2LNdWB_register_Asm_32: case ARM::VST2LNdWB_register_Asm_F:
+ case ARM::VST2LNdWB_register_Asm_F32: case ARM::VST2LNdWB_register_Asm_I32:
+ case ARM::VST2LNdWB_register_Asm_S32: case ARM::VST2LNdWB_register_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_P8:
+ case ARM::VST1LNdWB_fixed_Asm_I8: case ARM::VST1LNdWB_fixed_Asm_S8:
+ case ARM::VST1LNdWB_fixed_Asm_U8: case ARM::VST1LNdWB_fixed_Asm_16:
+ case ARM::VST1LNdWB_fixed_Asm_P16: case ARM::VST1LNdWB_fixed_Asm_I16:
+ case ARM::VST1LNdWB_fixed_Asm_S16: case ARM::VST1LNdWB_fixed_Asm_U16:
+ case ARM::VST1LNdWB_fixed_Asm_32: case ARM::VST1LNdWB_fixed_Asm_F:
+ case ARM::VST1LNdWB_fixed_Asm_F32: case ARM::VST1LNdWB_fixed_Asm_I32:
+ case ARM::VST1LNdWB_fixed_Asm_S32: case ARM::VST1LNdWB_fixed_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdWB_fixed_Asm_8: case ARM::VST2LNdWB_fixed_Asm_P8:
+ case ARM::VST2LNdWB_fixed_Asm_I8: case ARM::VST2LNdWB_fixed_Asm_S8:
+ case ARM::VST2LNdWB_fixed_Asm_U8: case ARM::VST2LNdWB_fixed_Asm_16:
+ case ARM::VST2LNdWB_fixed_Asm_P16: case ARM::VST2LNdWB_fixed_Asm_I16:
+ case ARM::VST2LNdWB_fixed_Asm_S16: case ARM::VST2LNdWB_fixed_Asm_U16:
+ case ARM::VST2LNdWB_fixed_Asm_32: case ARM::VST2LNdWB_fixed_Asm_F:
+ case ARM::VST2LNdWB_fixed_Asm_F32: case ARM::VST2LNdWB_fixed_Asm_I32:
+ case ARM::VST2LNdWB_fixed_Asm_S32: case ARM::VST2LNdWB_fixed_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_P8: case ARM::VST1LNdAsm_I8:
+ case ARM::VST1LNdAsm_S8: case ARM::VST1LNdAsm_U8: case ARM::VST1LNdAsm_16:
+ case ARM::VST1LNdAsm_P16: case ARM::VST1LNdAsm_I16: case ARM::VST1LNdAsm_S16:
+ case ARM::VST1LNdAsm_U16: case ARM::VST1LNdAsm_32: case ARM::VST1LNdAsm_F:
+ case ARM::VST1LNdAsm_F32: case ARM::VST1LNdAsm_I32: case ARM::VST1LNdAsm_S32:
+ case ARM::VST1LNdAsm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdAsm_8: case ARM::VST2LNdAsm_P8: case ARM::VST2LNdAsm_I8:
+ case ARM::VST2LNdAsm_S8: case ARM::VST2LNdAsm_U8: case ARM::VST2LNdAsm_16:
+ case ARM::VST2LNdAsm_P16: case ARM::VST2LNdAsm_I16: case ARM::VST2LNdAsm_S16:
+ case ARM::VST2LNdAsm_U16: case ARM::VST2LNdAsm_32: case ARM::VST2LNdAsm_F:
+ case ARM::VST2LNdAsm_F32: case ARM::VST2LNdAsm_I32: case ARM::VST2LNdAsm_S32:
+ case ARM::VST2LNdAsm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+ // Handle NEON VLD complex aliases.
+ case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_P8:
+ case ARM::VLD1LNdWB_register_Asm_I8: case ARM::VLD1LNdWB_register_Asm_S8:
+ case ARM::VLD1LNdWB_register_Asm_U8: case ARM::VLD1LNdWB_register_Asm_16:
+ case ARM::VLD1LNdWB_register_Asm_P16: case ARM::VLD1LNdWB_register_Asm_I16:
+ case ARM::VLD1LNdWB_register_Asm_S16: case ARM::VLD1LNdWB_register_Asm_U16:
+ case ARM::VLD1LNdWB_register_Asm_32: case ARM::VLD1LNdWB_register_Asm_F:
+ case ARM::VLD1LNdWB_register_Asm_F32: case ARM::VLD1LNdWB_register_Asm_I32:
+ case ARM::VLD1LNdWB_register_Asm_S32: case ARM::VLD1LNdWB_register_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdWB_register_Asm_8: case ARM::VLD2LNdWB_register_Asm_P8:
+ case ARM::VLD2LNdWB_register_Asm_I8: case ARM::VLD2LNdWB_register_Asm_S8:
+ case ARM::VLD2LNdWB_register_Asm_U8: case ARM::VLD2LNdWB_register_Asm_16:
+ case ARM::VLD2LNdWB_register_Asm_P16: case ARM::VLD2LNdWB_register_Asm_I16:
+ case ARM::VLD2LNdWB_register_Asm_S16: case ARM::VLD2LNdWB_register_Asm_U16:
+ case ARM::VLD2LNdWB_register_Asm_32: case ARM::VLD2LNdWB_register_Asm_F:
+ case ARM::VLD2LNdWB_register_Asm_F32: case ARM::VLD2LNdWB_register_Asm_I32:
+ case ARM::VLD2LNdWB_register_Asm_S32: case ARM::VLD2LNdWB_register_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_P8:
+ case ARM::VLD1LNdWB_fixed_Asm_I8: case ARM::VLD1LNdWB_fixed_Asm_S8:
+ case ARM::VLD1LNdWB_fixed_Asm_U8: case ARM::VLD1LNdWB_fixed_Asm_16:
+ case ARM::VLD1LNdWB_fixed_Asm_P16: case ARM::VLD1LNdWB_fixed_Asm_I16:
+ case ARM::VLD1LNdWB_fixed_Asm_S16: case ARM::VLD1LNdWB_fixed_Asm_U16:
+ case ARM::VLD1LNdWB_fixed_Asm_32: case ARM::VLD1LNdWB_fixed_Asm_F:
+ case ARM::VLD1LNdWB_fixed_Asm_F32: case ARM::VLD1LNdWB_fixed_Asm_I32:
+ case ARM::VLD1LNdWB_fixed_Asm_S32: case ARM::VLD1LNdWB_fixed_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdWB_fixed_Asm_8: case ARM::VLD2LNdWB_fixed_Asm_P8:
+ case ARM::VLD2LNdWB_fixed_Asm_I8: case ARM::VLD2LNdWB_fixed_Asm_S8:
+ case ARM::VLD2LNdWB_fixed_Asm_U8: case ARM::VLD2LNdWB_fixed_Asm_16:
+ case ARM::VLD2LNdWB_fixed_Asm_P16: case ARM::VLD2LNdWB_fixed_Asm_I16:
+ case ARM::VLD2LNdWB_fixed_Asm_S16: case ARM::VLD2LNdWB_fixed_Asm_U16:
+ case ARM::VLD2LNdWB_fixed_Asm_32: case ARM::VLD2LNdWB_fixed_Asm_F:
+ case ARM::VLD2LNdWB_fixed_Asm_F32: case ARM::VLD2LNdWB_fixed_Asm_I32:
+ case ARM::VLD2LNdWB_fixed_Asm_S32: case ARM::VLD2LNdWB_fixed_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_P8: case ARM::VLD1LNdAsm_I8:
+ case ARM::VLD1LNdAsm_S8: case ARM::VLD1LNdAsm_U8: case ARM::VLD1LNdAsm_16:
+ case ARM::VLD1LNdAsm_P16: case ARM::VLD1LNdAsm_I16: case ARM::VLD1LNdAsm_S16:
+ case ARM::VLD1LNdAsm_U16: case ARM::VLD1LNdAsm_32: case ARM::VLD1LNdAsm_F:
+ case ARM::VLD1LNdAsm_F32: case ARM::VLD1LNdAsm_I32: case ARM::VLD1LNdAsm_S32:
+ case ARM::VLD1LNdAsm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdAsm_8: case ARM::VLD2LNdAsm_P8: case ARM::VLD2LNdAsm_I8:
+ case ARM::VLD2LNdAsm_S8: case ARM::VLD2LNdAsm_U8: case ARM::VLD2LNdAsm_16:
+ case ARM::VLD2LNdAsm_P16: case ARM::VLD2LNdAsm_I16: case ARM::VLD2LNdAsm_S16:
+ case ARM::VLD2LNdAsm_U16: case ARM::VLD2LNdAsm_32: case ARM::VLD2LNdAsm_F:
+ case ARM::VLD2LNdAsm_F32: case ARM::VLD2LNdAsm_I32: case ARM::VLD2LNdAsm_S32:
+ case ARM::VLD2LNdAsm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+ // Handle the Thumb2 mode MOV complex aliases.
+ case ARM::t2MOVsi:
+ case ARM::t2MOVSsi: {
+ // Which instruction to expand to depends on the CCOut operand and
+ // whether we're in an IT block if the register operands are low
+ // registers.
+ bool isNarrow = false;
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi))
+ isNarrow = true;
+ MCInst TmpInst;
+ unsigned newOpc;
+ switch(ARM_AM::getSORegShOp(Inst.getOperand(2).getImm())) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break;
+ case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break;
+ case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break;
+ case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break;
+ }
+ unsigned Ammount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm());
+ if (Ammount == 32) Ammount = 0;
+ TmpInst.setOpcode(newOpc);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ if (isNarrow)
+ TmpInst.addOperand(MCOperand::CreateReg(
+ Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::CreateImm(Ammount));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ if (!isNarrow)
+ TmpInst.addOperand(MCOperand::CreateReg(
+ Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
+ Inst = TmpInst;
+ return true;
+ }
+ // Handle the ARM mode MOV complex aliases.
case ARM::ASRr:
case ARM::LSRr:
case ARM::LSLr:
@@ -4743,6 +5656,24 @@ processInstruction(MCInst &Inst,
Inst = TmpInst;
}
break;
+ case ARM::t2ADDri12:
+ // If the immediate fits for encoding T3 (t2ADDri) and the generic "add"
+ // mnemonic was used (not "addw"), encoding T3 is preferred.
+ if (static_cast<ARMOperand*>(Operands[0])->getToken() != "add" ||
+ ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
+ break;
+ Inst.setOpcode(ARM::t2ADDri);
+ Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+ break;
+ case ARM::t2SUBri12:
+ // If the immediate fits for encoding T3 (t2SUBri) and the generic "sub"
+ // mnemonic was used (not "subw"), encoding T3 is preferred.
+ if (static_cast<ARMOperand*>(Operands[0])->getToken() != "sub" ||
+ ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
+ break;
+ Inst.setOpcode(ARM::t2SUBri);
+ Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+ break;
case ARM::tADDi8:
// If the immediate is in the range 0-7, we want tADDi3 iff Rd was
// explicitly specified. From the ARM ARM: "Encoding T1 is preferred
@@ -4763,6 +5694,26 @@ processInstruction(MCInst &Inst,
return true;
}
break;
+ case ARM::t2ADDrr: {
+ // If the destination and first source operand are the same, and
+ // there's no setting of the flags, use encoding T2 instead of T3.
+ // Note that this is only for ADD, not SUB. This mirrors the system
+ // 'as' behaviour. Make sure the wide encoding wasn't explicit.
+ if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
+ Inst.getOperand(5).getReg() != 0 ||
+ (static_cast<ARMOperand*>(Operands[3])->isToken() &&
+ static_cast<ARMOperand*>(Operands[3])->getToken() == ".w"))
+ break;
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tADDhirr);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
case ARM::tB:
// A Thumb conditional branch outside of an IT block is a tBcc.
if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) {
@@ -5079,12 +6030,16 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveWord(4, DirectiveID.getLoc());
else if (IDVal == ".thumb")
return parseDirectiveThumb(DirectiveID.getLoc());
+ else if (IDVal == ".arm")
+ return parseDirectiveARM(DirectiveID.getLoc());
else if (IDVal == ".thumb_func")
return parseDirectiveThumbFunc(DirectiveID.getLoc());
else if (IDVal == ".code")
return parseDirectiveCode(DirectiveID.getLoc());
else if (IDVal == ".syntax")
return parseDirectiveSyntax(DirectiveID.getLoc());
+ else if (IDVal == ".unreq")
+ return parseDirectiveUnreq(DirectiveID.getLoc());
return true;
}
@@ -5120,9 +6075,22 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
return Error(L, "unexpected token in directive");
Parser.Lex();
- // TODO: set thumb mode
- // TODO: tell the MC streamer the mode
- // getParser().getStreamer().Emit???();
+ if (!isThumb())
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+ return false;
+}
+
+/// parseDirectiveARM
+/// ::= .arm
+bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+
+ if (isThumb())
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
return false;
}
@@ -5212,6 +6180,45 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
return false;
}
+/// parseDirectiveReq
+/// ::= name .req registername
+bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
+ Parser.Lex(); // Eat the '.req' token.
+ unsigned Reg;
+ SMLoc SRegLoc, ERegLoc;
+ if (ParseRegister(Reg, SRegLoc, ERegLoc)) {
+ Parser.EatToEndOfStatement();
+ return Error(SRegLoc, "register name expected");
+ }
+
+ // Shouldn't be anything else.
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
+ Parser.EatToEndOfStatement();
+ return Error(Parser.getTok().getLoc(),
+ "unexpected input in .req directive.");
+ }
+
+ Parser.Lex(); // Consume the EndOfStatement
+
+ if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg)
+ return Error(SRegLoc, "redefinition of '" + Name +
+ "' does not match original.");
+
+ return false;
+}
+
+/// parseDirectiveUneq
+/// ::= .unreq registername
+bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Parser.EatToEndOfStatement();
+ return Error(L, "unexpected input in .unreq directive.");
+ }
+ RegisterReqs.erase(Parser.getTok().getIdentifier());
+ Parser.Lex(); // Eat the identifier.
+ return false;
+}
+
extern "C" void LLVMInitializeARMAsmLexer();
/// Force static initialization.
diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt
index 3f5ad39..e24a1b1 100644
--- a/lib/Target/ARM/AsmParser/CMakeLists.txt
+++ b/lib/Target/ARM/AsmParser/CMakeLists.txt
@@ -6,11 +6,3 @@ add_llvm_library(LLVMARMAsmParser
)
add_dependencies(LLVMARMAsmParser ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMAsmParser
- LLVMARMDesc
- LLVMARMInfo
- LLVMMC
- LLVMMCParser
- LLVMSupport
- )
diff --git a/lib/Target/ARM/AsmParser/LLVMBuild.txt b/lib/Target/ARM/AsmParser/LLVMBuild.txt
index cbf9b4b..f0184b6 100644
--- a/lib/Target/ARM/AsmParser/LLVMBuild.txt
+++ b/lib/Target/ARM/AsmParser/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = ARMAsmParser
parent = ARM
required_libraries = ARMDesc ARMInfo MC MCParser Support
add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 511932e..04cdf55 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -48,20 +48,6 @@ add_llvm_target(ARMCodeGen
Thumb2SizeReduction.cpp
)
-add_llvm_library_dependencies(LLVMARMCodeGen
- LLVMARMAsmPrinter
- LLVMARMDesc
- LLVMARMInfo
- LLVMAnalysis
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
# workaround for hanging compilation on MSVC9, 10
if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 )
set_property(
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index ad250ab..49c64fd 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -2085,15 +2085,24 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
case ARM::VLD1d32Qwb_register:
case ARM::VLD1d64Qwb_fixed:
case ARM::VLD1d64Qwb_register:
- case ARM::VLD2d8_UPD:
- case ARM::VLD2d16_UPD:
- case ARM::VLD2d32_UPD:
- case ARM::VLD2q8_UPD:
- case ARM::VLD2q16_UPD:
- case ARM::VLD2q32_UPD:
- case ARM::VLD2b8_UPD:
- case ARM::VLD2b16_UPD:
- case ARM::VLD2b32_UPD:
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2q8wb_fixed:
+ case ARM::VLD2q16wb_fixed:
+ case ARM::VLD2q32wb_fixed:
+ case ARM::VLD2d8wb_register:
+ case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_register:
+ case ARM::VLD2q8wb_register:
+ case ARM::VLD2q16wb_register:
+ case ARM::VLD2q32wb_register:
+ case ARM::VLD2b8wb_fixed:
+ case ARM::VLD2b16wb_fixed:
+ case ARM::VLD2b32wb_fixed:
+ case ARM::VLD2b8wb_register:
+ case ARM::VLD2b16wb_register:
+ case ARM::VLD2b32wb_register:
case ARM::VLD3d8_UPD:
case ARM::VLD3d16_UPD:
case ARM::VLD3d32_UPD:
@@ -2196,23 +2205,40 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
case ARM::VST1q16wb_register:
case ARM::VST1q32wb_register:
case ARM::VST1q64wb_register:
- case ARM::VST1d8T_UPD:
- case ARM::VST1d16T_UPD:
- case ARM::VST1d32T_UPD:
- case ARM::VST1d64T_UPD:
- case ARM::VST1d8Q_UPD:
- case ARM::VST1d16Q_UPD:
- case ARM::VST1d32Q_UPD:
- case ARM::VST1d64Q_UPD:
- case ARM::VST2d8_UPD:
- case ARM::VST2d16_UPD:
- case ARM::VST2d32_UPD:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
- case ARM::VST2b8_UPD:
- case ARM::VST2b16_UPD:
- case ARM::VST2b32_UPD:
+ case ARM::VST1d8Twb_fixed:
+ case ARM::VST1d16Twb_fixed:
+ case ARM::VST1d32Twb_fixed:
+ case ARM::VST1d64Twb_fixed:
+ case ARM::VST1d8Twb_register:
+ case ARM::VST1d16Twb_register:
+ case ARM::VST1d32Twb_register:
+ case ARM::VST1d64Twb_register:
+ case ARM::VST1d8Qwb_fixed:
+ case ARM::VST1d16Qwb_fixed:
+ case ARM::VST1d32Qwb_fixed:
+ case ARM::VST1d64Qwb_fixed:
+ case ARM::VST1d8Qwb_register:
+ case ARM::VST1d16Qwb_register:
+ case ARM::VST1d32Qwb_register:
+ case ARM::VST1d64Qwb_register:
+ case ARM::VST2d8wb_fixed:
+ case ARM::VST2d16wb_fixed:
+ case ARM::VST2d32wb_fixed:
+ case ARM::VST2d8wb_register:
+ case ARM::VST2d16wb_register:
+ case ARM::VST2d32wb_register:
+ case ARM::VST2q8wb_fixed:
+ case ARM::VST2q16wb_fixed:
+ case ARM::VST2q32wb_fixed:
+ case ARM::VST2q8wb_register:
+ case ARM::VST2q16wb_register:
+ case ARM::VST2q32wb_register:
+ case ARM::VST2b8wb_fixed:
+ case ARM::VST2b16wb_fixed:
+ case ARM::VST2b32wb_fixed:
+ case ARM::VST2b8wb_register:
+ case ARM::VST2b16wb_register:
+ case ARM::VST2b32wb_register:
case ARM::VST3d8_UPD:
case ARM::VST3d16_UPD:
case ARM::VST3d32_UPD:
@@ -2264,34 +2290,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
// Second input register
switch (Inst.getOpcode()) {
- case ARM::VST1d8T:
- case ARM::VST1d16T:
- case ARM::VST1d32T:
- case ARM::VST1d64T:
- case ARM::VST1d8T_UPD:
- case ARM::VST1d16T_UPD:
- case ARM::VST1d32T_UPD:
- case ARM::VST1d64T_UPD:
- case ARM::VST1d8Q:
- case ARM::VST1d16Q:
- case ARM::VST1d32Q:
- case ARM::VST1d64Q:
- case ARM::VST1d8Q_UPD:
- case ARM::VST1d16Q_UPD:
- case ARM::VST1d32Q_UPD:
- case ARM::VST1d64Q_UPD:
- case ARM::VST2d8:
- case ARM::VST2d16:
- case ARM::VST2d32:
- case ARM::VST2d8_UPD:
- case ARM::VST2d16_UPD:
- case ARM::VST2d32_UPD:
- case ARM::VST2q8:
- case ARM::VST2q16:
- case ARM::VST2q32:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
case ARM::VST3d8:
case ARM::VST3d16:
case ARM::VST3d32:
@@ -2307,12 +2305,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
return MCDisassembler::Fail;
break;
- case ARM::VST2b8:
- case ARM::VST2b16:
- case ARM::VST2b32:
- case ARM::VST2b8_UPD:
- case ARM::VST2b16_UPD:
- case ARM::VST2b32_UPD:
case ARM::VST3q8:
case ARM::VST3q16:
case ARM::VST3q32:
@@ -2334,28 +2326,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
// Third input register
switch (Inst.getOpcode()) {
- case ARM::VST1d8T:
- case ARM::VST1d16T:
- case ARM::VST1d32T:
- case ARM::VST1d64T:
- case ARM::VST1d8T_UPD:
- case ARM::VST1d16T_UPD:
- case ARM::VST1d32T_UPD:
- case ARM::VST1d64T_UPD:
- case ARM::VST1d8Q:
- case ARM::VST1d16Q:
- case ARM::VST1d32Q:
- case ARM::VST1d64Q:
- case ARM::VST1d8Q_UPD:
- case ARM::VST1d16Q_UPD:
- case ARM::VST1d32Q_UPD:
- case ARM::VST1d64Q_UPD:
- case ARM::VST2q8:
- case ARM::VST2q16:
- case ARM::VST2q32:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
case ARM::VST3d8:
case ARM::VST3d16:
case ARM::VST3d32:
@@ -2392,20 +2362,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
// Fourth input register
switch (Inst.getOpcode()) {
- case ARM::VST1d8Q:
- case ARM::VST1d16Q:
- case ARM::VST1d32Q:
- case ARM::VST1d64Q:
- case ARM::VST1d8Q_UPD:
- case ARM::VST1d16Q_UPD:
- case ARM::VST1d32Q_UPD:
- case ARM::VST1d64Q_UPD:
- case ARM::VST2q8:
- case ARM::VST2q16:
- case ARM::VST2q32:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
case ARM::VST4d8:
case ARM::VST4d16:
case ARM::VST4d32:
@@ -2441,16 +2397,11 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
unsigned align = fieldFromInstruction32(Insn, 4, 1);
unsigned size = fieldFromInstruction32(Insn, 6, 2);
- unsigned regs = fieldFromInstruction32(Insn, 5, 1) + 1;
align *= (1 << size);
if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
- if (regs == 2) {
- if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
- return MCDisassembler::Fail;
- }
if (Rm != 0xF) {
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2460,12 +2411,12 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::CreateImm(align));
- if (Rm == 0xD)
- Inst.addOperand(MCOperand::CreateReg(0));
- else if (Rm != 0xF) {
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
- return MCDisassembler::Fail;
- }
+ // The fixed offset post-increment encodes Rm == 0xd. The no-writeback
+ // variant encodes Rm == 0xf. Anything else is a register offset post-
+ // increment and we need to add the register operand to the instruction.
+ if (Rm != 0xD && Rm != 0xF &&
+ !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
return S;
}
@@ -2693,7 +2644,6 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
unsigned op = fieldFromInstruction32(Insn, 6, 1);
- unsigned length = fieldFromInstruction32(Insn, 8, 2) + 1;
if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2702,10 +2652,8 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail; // Writeback
}
- for (unsigned i = 0; i < length; ++i) {
- if (!Check(S, DecodeDPRRegisterClass(Inst, (Rn+i)%32, Address, Decoder)))
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
- }
if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
@@ -4138,4 +4086,3 @@ static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-
diff --git a/lib/Target/ARM/Disassembler/CMakeLists.txt b/lib/Target/ARM/Disassembler/CMakeLists.txt
index da87751..9de6e5c 100644
--- a/lib/Target/ARM/Disassembler/CMakeLists.txt
+++ b/lib/Target/ARM/Disassembler/CMakeLists.txt
@@ -11,11 +11,3 @@ set_property(
)
endif()
add_dependencies(LLVMARMDisassembler ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMDisassembler
- LLVMARMCodeGen
- LLVMARMDesc
- LLVMARMInfo
- LLVMMC
- LLVMSupport
- )
diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt
index baa9bc3..94075a9 100644
--- a/lib/Target/ARM/Disassembler/LLVMBuild.txt
+++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = ARMDisassembler
parent = ARM
required_libraries = ARMCodeGen ARMDesc ARMInfo MC Support
add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 6c6c021..662097a 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -1029,3 +1029,29 @@ void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
<< getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", "
<< getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "}";
}
+
+void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}";
+}
+
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 3f38f1a..05db2d2 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -133,6 +133,12 @@ public:
void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
};
} // end namespace llvm
diff --git a/lib/Target/ARM/InstPrinter/CMakeLists.txt b/lib/Target/ARM/InstPrinter/CMakeLists.txt
index fa0b495..e2d4819 100644
--- a/lib/Target/ARM/InstPrinter/CMakeLists.txt
+++ b/lib/Target/ARM/InstPrinter/CMakeLists.txt
@@ -5,8 +5,3 @@ add_llvm_library(LLVMARMAsmPrinter
)
add_dependencies(LLVMARMAsmPrinter ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMAsmPrinter
- LLVMMC
- LLVMSupport
- )
diff --git a/lib/Target/ARM/InstPrinter/LLVMBuild.txt b/lib/Target/ARM/InstPrinter/LLVMBuild.txt
index b34aab4..6f4fa36 100644
--- a/lib/Target/ARM/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/ARM/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = ARMAsmPrinter
parent = ARM
required_libraries = MC Support
add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/LLVMBuild.txt b/lib/Target/ARM/LLVMBuild.txt
index 9082539..fd4b3a3 100644
--- a/lib/Target/ARM/LLVMBuild.txt
+++ b/lib/Target/ARM/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
+
[component_0]
type = TargetGroup
name = ARM
@@ -30,4 +33,3 @@ name = ARMCodeGen
parent = ARM
required_libraries = ARMAsmPrinter ARMDesc ARMInfo Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target
add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 62d04c4..bf1f0e8 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -102,6 +102,11 @@ public:
bool MayNeedRelaxation(const MCInst &Inst) const;
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const;
+
void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
@@ -124,14 +129,49 @@ public:
};
} // end anonymous namespace
+static unsigned getRelaxedOpcode(unsigned Op) {
+ switch (Op) {
+ default: return Op;
+ case ARM::tBcc: return ARM::t2Bcc;
+ }
+}
+
bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
- // FIXME: Thumb targets, different move constant targets..
+ if (getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode())
+ return true;
return false;
}
+bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the
+ // low bit being an implied zero. There's an implied +4 offset for the
+ // branch, so we adjust the other way here to determine what's
+ // encodable.
+ //
+ // Relax if the value is too big for a (signed) i8.
+ int64_t Offset = int64_t(Value) - 4;
+ return Offset > 254 || Offset < -256;
+}
+
void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
- assert(0 && "ARMAsmBackend::RelaxInstruction() unimplemented");
- return;
+ unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
+
+ // Sanity check w/ diagnostic if we get here w/ a bogus instruction.
+ if (RelaxedOp == Inst.getOpcode()) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ Inst.dump_pretty(OS);
+ OS << "\n";
+ report_fatal_error("unexpected instruction to relax: " + OS.str());
+ }
+
+ // The instructions we're relaxing have (so far) the same operands.
+ // We just need to update to the proper opcode.
+ Res = Inst;
+ Res.setOpcode(RelaxedOp);
}
bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 865c3e2..c38a882 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -1412,7 +1412,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(Op);
if (MO.getReg() == 0) return 0x0D;
- return MO.getReg();
+ return getARMRegisterNumbering(MO.getReg());
}
unsigned ARMMCCodeEmitter::
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 352c73e..f394b4f 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ErrorHandling.h"
@@ -178,9 +179,16 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
case ARM::fixup_arm_movt_hi16:
case ARM::fixup_arm_movt_hi16_pcrel:
MovtBit = 1;
+ // The thumb bit shouldn't be set in the 'other-half' bit of the
+ // relocation, but it will be set in FixedValue if the base symbol
+ // is a thumb function. Clear it out here.
+ if (A_SD->getFlags() & SF_ThumbFunc)
+ FixedValue &= 0xfffffffe;
break;
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movt_hi16_pcrel:
+ if (A_SD->getFlags() & SF_ThumbFunc)
+ FixedValue &= 0xfffffffe;
MovtBit = 1;
// Fallthrough
case ARM::fixup_t2_movw_lo16:
@@ -189,7 +197,6 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
break;
}
-
if (Type == macho::RIT_ARM_HalfDifference) {
uint32_t OtherHalf = MovtBit
? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
index f529314..f2cf78a 100644
--- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -10,10 +10,3 @@ add_dependencies(LLVMARMDesc ARMCommonTableGen)
# Hack: we need to include 'main' target directory to grab private headers
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
-
-add_llvm_library_dependencies(LLVMARMDesc
- LLVMARMAsmPrinter
- LLVMARMInfo
- LLVMMC
- LLVMSupport
- )
diff --git a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
index 46b11c7..2a7fe61 100644
--- a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = ARMDesc
parent = ARM
required_libraries = ARMAsmPrinter ARMInfo MC Support
add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 2df0053..000a37f 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -139,7 +139,7 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
// FIXME: Detect integer instructions properly.
const MCInstrDesc &MCID = MI->getDesc();
unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
- if (MCID.mayStore())
+ if (MI->mayStore())
return false;
unsigned Opcode = MCID.getOpcode();
if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
@@ -222,14 +222,14 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI));
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID1, TmpReg)
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
.addReg(Src1Reg, getKillRegState(Src1Kill))
.addReg(Src2Reg, getKillRegState(Src2Kill));
if (HasLane)
MIB.addImm(LaneImm);
MIB.addImm(Pred).addReg(PredReg);
- MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID2)
+ MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2)
.addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
if (NegAcc) {
@@ -274,7 +274,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
}
const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.isBarrier()) {
+ if (MI->isBarrier()) {
clearStack();
Skip = 0;
++MII;
diff --git a/lib/Target/ARM/TargetInfo/CMakeLists.txt b/lib/Target/ARM/TargetInfo/CMakeLists.txt
index 8b38b13..533e747 100644
--- a/lib/Target/ARM/TargetInfo/CMakeLists.txt
+++ b/lib/Target/ARM/TargetInfo/CMakeLists.txt
@@ -5,9 +5,3 @@ add_llvm_library(LLVMARMInfo
)
add_dependencies(LLVMARMInfo ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
diff --git a/lib/Target/ARM/TargetInfo/LLVMBuild.txt b/lib/Target/ARM/TargetInfo/LLVMBuild.txt
index 046c1fc..a07a940 100644
--- a/lib/Target/ARM/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/ARM/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = ARMInfo
parent = ARM
required_libraries = MC Support Target
add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index e8ed482..e61c0a7 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -643,14 +643,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
assert(Offset && "This code isn't needed if offset already handled!");
unsigned Opcode = MI.getOpcode();
- const MCInstrDesc &Desc = MI.getDesc();
// Remove predicate first.
int PIdx = MI.findFirstPredOperandIdx();
if (PIdx != -1)
removeOperands(MI, PIdx);
- if (Desc.mayLoad()) {
+ if (MI.mayLoad()) {
// Use the destination register to materialize sp + offset.
unsigned TmpReg = MI.getOperand(0).getReg();
bool UseRR = false;
@@ -673,7 +672,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
// register. The offset is already handled in the vreg value.
MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
- } else if (Desc.mayStore()) {
+ } else if (MI.mayStore()) {
VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
bool UseRR = false;
@@ -699,7 +698,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
// Add predicate back if it's needed.
- if (MI.getDesc().isPredicable()) {
+ if (MI.isPredicable()) {
MachineInstrBuilder MIB(&MI);
AddDefaultPred(MIB);
}
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index b627400..55b4d30 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -13,6 +13,7 @@
#include "Thumb2InstrInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
@@ -141,7 +142,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
// rsb r2, 0
//
const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.hasOptionalDef() &&
+ if (MI->hasOptionalDef() &&
MI->getOperand(MCID.getNumOperands() - 1).getReg() == ARM::CPSR)
return false;
@@ -198,7 +199,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
// Branches, including tricky ones like LDM_RET, need to end an IT
// block so check the instruction we just put in the block.
for (; MBBI != E && Pos &&
- (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) {
+ (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) {
if (MBBI->isDebugValue())
continue;
@@ -237,6 +238,9 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
// Last instruction in IT block kills ITSTATE.
LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
+ // Finalize the bundle.
+ FinalizeBundle(MBB, InsertPos.getInstrIterator(), LastITMI);
+
Modified = true;
++NumITs;
}
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index e5fc8b4..e206288 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -452,7 +452,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit load / store instruction.
DebugLoc dl = MI->getDebugLoc();
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
if (!isLdStMul) {
MIB.addOperand(MI->getOperand(0));
MIB.addOperand(MI->getOperand(1));
@@ -478,7 +478,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
- MBB.erase(MI);
+ MBB.erase_instr(MI);
++NumLdSts;
return true;
}
@@ -513,7 +513,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
return false;
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(),
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
TII->get(ARM::tADDrSPi))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
@@ -525,7 +525,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB);
- MBB.erase(MI);
+ MBB.erase_instr(MI);
++NumNarrows;
return true;
}
@@ -533,8 +533,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
if (Entry.LowRegs1 && !VerifyLowRegs(MI))
return false;
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.mayLoad() || MCID.mayStore())
+ if (MI->mayLoad() || MI->mayStore())
return ReduceLoadStore(MBB, MI, Entry);
switch (Opc) {
@@ -654,7 +653,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit instruction.
DebugLoc dl = MI->getDebugLoc();
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
MIB.addOperand(MI->getOperand(0));
if (NewMCID.hasOptionalDef()) {
if (HasCC)
@@ -678,7 +677,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
- MBB.erase(MI);
+ MBB.erase_instr(MI);
++Num2Addrs;
return true;
}
@@ -745,7 +744,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit instruction.
DebugLoc dl = MI->getDebugLoc();
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
MIB.addOperand(MI->getOperand(0));
if (NewMCID.hasOptionalDef()) {
if (HasCC)
@@ -785,7 +784,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
- MBB.erase(MI);
+ MBB.erase_instr(MI);
++NumNarrows;
return true;
}
@@ -830,16 +829,22 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
// Yes, CPSR could be livein.
bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
MachineInstr *CPSRDef = 0;
+ MachineInstr *BundleMI = 0;
// If this BB loops back to itself, conservatively avoid narrowing the
// first instruction that does partial flag update.
bool IsSelfLoop = MBB.isSuccessor(&MBB);
- MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
- MachineBasicBlock::iterator NextMII;
+ MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), E = MBB.instr_end();
+ MachineBasicBlock::instr_iterator NextMII;
for (; MII != E; MII = NextMII) {
NextMII = llvm::next(MII);
MachineInstr *MI = &*MII;
+ if (MI->isBundle()) {
+ BundleMI = MI;
+ continue;
+ }
+
LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
unsigned Opcode = MI->getOpcode();
@@ -850,7 +855,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
if (Entry.Special) {
if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
Modified = true;
- MachineBasicBlock::iterator I = prior(NextMII);
+ MachineBasicBlock::instr_iterator I = prior(NextMII);
MI = &*I;
}
goto ProcessNext;
@@ -860,7 +865,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
if (Entry.NarrowOpc2 &&
ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
Modified = true;
- MachineBasicBlock::iterator I = prior(NextMII);
+ MachineBasicBlock::instr_iterator I = prior(NextMII);
MI = &*I;
goto ProcessNext;
}
@@ -869,15 +874,24 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
if (Entry.NarrowOpc1 &&
ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
Modified = true;
- MachineBasicBlock::iterator I = prior(NextMII);
+ MachineBasicBlock::instr_iterator I = prior(NextMII);
MI = &*I;
}
}
ProcessNext:
+ if (LiveCPSR &&
+ NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle() &&
+ BundleMI->killsRegister(ARM::CPSR))
+ // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
+ // marker is only on the BUNDLE instruction. Process the BUNDLE
+ // instruction as we finish with the bundled instruction to work around
+ // the inconsistency.
+ LiveCPSR = false;
+
bool DefCPSR = false;
LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
- if (MI->getDesc().isCall()) {
+ if (MI->isCall()) {
// Calls don't really set CPSR.
CPSRDef = 0;
IsSelfLoop = false;
diff --git a/lib/Target/CBackend/CMakeLists.txt b/lib/Target/CBackend/CMakeLists.txt
index edf8ee7..fa819a4 100644
--- a/lib/Target/CBackend/CMakeLists.txt
+++ b/lib/Target/CBackend/CMakeLists.txt
@@ -2,16 +2,4 @@ add_llvm_target(CBackendCodeGen
CBackend.cpp
)
-add_llvm_library_dependencies(LLVMCBackendCodeGen
- LLVMAnalysis
- LLVMCBackendInfo
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMScalarOpts
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
- )
-
add_subdirectory(TargetInfo)
diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h
index ca346af..8b2286e 100644
--- a/lib/Target/CBackend/CTargetMachine.h
+++ b/lib/Target/CBackend/CTargetMachine.h
@@ -21,10 +21,10 @@ namespace llvm {
struct CTargetMachine : public TargetMachine {
CTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : TargetMachine(T, TT, CPU, FS) {}
+ : TargetMachine(T, TT, CPU, FS, Options) { }
virtual bool addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &Out,
diff --git a/lib/Target/CBackend/LLVMBuild.txt b/lib/Target/CBackend/LLVMBuild.txt
index 851ded9..e64feb0 100644
--- a/lib/Target/CBackend/LLVMBuild.txt
+++ b/lib/Target/CBackend/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = TargetInfo
+
[component_0]
type = TargetGroup
name = CBackend
@@ -26,4 +29,3 @@ name = CBackendCodeGen
parent = CBackend
required_libraries = Analysis CBackendInfo CodeGen Core MC Scalar Support Target TransformUtils
add_to_library_groups = CBackend
-
diff --git a/lib/Target/CBackend/TargetInfo/CMakeLists.txt b/lib/Target/CBackend/TargetInfo/CMakeLists.txt
index 8e616be..6203616 100644
--- a/lib/Target/CBackend/TargetInfo/CMakeLists.txt
+++ b/lib/Target/CBackend/TargetInfo/CMakeLists.txt
@@ -3,9 +3,3 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
add_llvm_library(LLVMCBackendInfo
CBackendTargetInfo.cpp
)
-
-add_llvm_library_dependencies(LLVMCBackendInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
diff --git a/lib/Target/CBackend/TargetInfo/LLVMBuild.txt b/lib/Target/CBackend/TargetInfo/LLVMBuild.txt
index 35752b7..1b47d8e 100644
--- a/lib/Target/CBackend/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/CBackend/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = CBackendInfo
parent = CBackend
required_libraries = MC Support Target
add_to_library_groups = CBackend
-
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 60e2189..22d8c76 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -3,7 +3,6 @@ add_llvm_library(LLVMTarget
Target.cpp
TargetData.cpp
TargetELFWriterInfo.cpp
- TargetFrameLowering.cpp
TargetInstrInfo.cpp
TargetIntrinsicInfo.cpp
TargetLibraryInfo.cpp
@@ -13,12 +12,6 @@ add_llvm_library(LLVMTarget
TargetSubtargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMTarget
- LLVMCore
- LLVMMC
- LLVMSupport
- )
-
foreach(t ${LLVM_TARGETS_TO_BUILD})
message(STATUS "Targeting ${t}")
add_subdirectory(${t})
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
index b442a5c..6c67c2d 100644
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ b/lib/Target/CellSPU/CMakeLists.txt
@@ -23,17 +23,5 @@ add_llvm_target(CellSPUCodeGen
SPUNopFiller.cpp
)
-add_llvm_library_dependencies(LLVMCellSPUCodeGen
- LLVMAsmPrinter
- LLVMCellSPUDesc
- LLVMCellSPUInfo
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/CellSPU/LLVMBuild.txt b/lib/Target/CellSPU/LLVMBuild.txt
index 4ae26b2..277620b 100644
--- a/lib/Target/CellSPU/LLVMBuild.txt
+++ b/lib/Target/CellSPU/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = MCTargetDesc TargetInfo
+
[component_0]
type = TargetGroup
name = CellSPU
@@ -27,4 +30,3 @@ name = CellSPUCodeGen
parent = CellSPU
required_libraries = AsmPrinter CellSPUDesc CellSPUInfo CodeGen Core MC SelectionDAG Support Target
add_to_library_groups = CellSPU
-
diff --git a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
index d41fe93..0027bdb 100644
--- a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
@@ -3,9 +3,4 @@ add_llvm_library(LLVMCellSPUDesc
SPUMCAsmInfo.cpp
)
-add_llvm_library_dependencies(LLVMCellSPUDesc
- LLVMCellSPUInfo
- LLVMMC
- )
-
add_dependencies(LLVMCellSPUDesc CellSPUCommonTableGen)
diff --git a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt
index abc44a2..71e5bbc 100644
--- a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = CellSPUDesc
parent = CellSPU
required_libraries = CellSPUInfo MC
add_to_library_groups = CellSPU
-
diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp
index 093f99f..916f9ba 100644
--- a/lib/Target/CellSPU/SPUFrameLowering.cpp
+++ b/lib/Target/CellSPU/SPUFrameLowering.cpp
@@ -47,7 +47,8 @@ bool SPUFrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
return MFI->getStackSize() &&
- (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects());
+ (MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects());
}
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index d58e49b..dc0d5a6 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -296,12 +296,22 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::CTTZ , MVT::i32, Expand);
setOperationAction(ISD::CTTZ , MVT::i64, Expand);
setOperationAction(ISD::CTTZ , MVT::i128, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i128, Expand);
setOperationAction(ISD::CTLZ , MVT::i8, Promote);
setOperationAction(ISD::CTLZ , MVT::i16, Promote);
setOperationAction(ISD::CTLZ , MVT::i32, Legal);
setOperationAction(ISD::CTLZ , MVT::i64, Expand);
setOperationAction(ISD::CTLZ , MVT::i128, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i128, Expand);
// SPU has a version of select that implements (a&~c)|(b&c), just like
// select ought to work:
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 6940316..1e922a4 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -34,9 +34,10 @@ SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS),
DataLayout(Subtarget.getTargetDataString()),
InstrInfo(*this),
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index 909f12e..0841fee 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -39,7 +39,7 @@ class SPUTargetMachine : public LLVMTargetMachine {
InstrItineraryData InstrItins;
public:
SPUTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
diff --git a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
index 3f2d6b09..6a98f95 100644
--- a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
+++ b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMCellSPUInfo
CellSPUTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMCellSPUInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMCellSPUInfo CellSPUCommonTableGen)
diff --git a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt
index 0710cc3..6937e70 100644
--- a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = CellSPUInfo
parent = CellSPU
required_libraries = MC Support Target
add_to_library_groups = CellSPU
-
diff --git a/lib/Target/CppBackend/CMakeLists.txt b/lib/Target/CppBackend/CMakeLists.txt
index 53f6868..515e1dd 100644
--- a/lib/Target/CppBackend/CMakeLists.txt
+++ b/lib/Target/CppBackend/CMakeLists.txt
@@ -2,11 +2,4 @@ add_llvm_target(CppBackendCodeGen
CPPBackend.cpp
)
-add_llvm_library_dependencies(LLVMCppBackendCodeGen
- LLVMCore
- LLVMCppBackendInfo
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(TargetInfo)
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index a3613b4..92bca6c 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -23,10 +23,10 @@ class formatted_raw_ostream;
struct CPPTargetMachine : public TargetMachine {
CPPTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : TargetMachine(T, TT, CPU, FS) {}
+ : TargetMachine(T, TT, CPU, FS, Options) {}
virtual bool addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &Out,
diff --git a/lib/Target/CppBackend/LLVMBuild.txt b/lib/Target/CppBackend/LLVMBuild.txt
index 77e31c7..122b5e7 100644
--- a/lib/Target/CppBackend/LLVMBuild.txt
+++ b/lib/Target/CppBackend/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = TargetInfo
+
[component_0]
type = TargetGroup
name = CppBackend
@@ -26,4 +29,3 @@ name = CppBackendCodeGen
parent = CppBackend
required_libraries = Core CppBackendInfo Support Target
add_to_library_groups = CppBackend
-
diff --git a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
index 738b215..f82d72e 100644
--- a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
+++ b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
@@ -3,9 +3,3 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
add_llvm_library(LLVMCppBackendInfo
CppBackendTargetInfo.cpp
)
-
-add_llvm_library_dependencies(LLVMCppBackendInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
diff --git a/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt b/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt
index 67a23ba..d4dfc3e 100644
--- a/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = CppBackendInfo
parent = CppBackend
required_libraries = MC Support Target
add_to_library_groups = CppBackend
-
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
new file mode 100644
index 0000000..f8705ee
--- /dev/null
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -0,0 +1,35 @@
+set(LLVM_TARGET_DEFINITIONS Hexagon.td)
+
+tablegen(LLVM HexagonGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM HexagonGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM HexagonGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM HexagonGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM HexagonGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM HexagonGenIntrinsics.inc -gen-tgt-intrinsic)
+add_public_tablegen_target(HexagonCommonTableGen)
+
+add_llvm_target(HexagonCodeGen
+ HexagonAsmPrinter.cpp
+ HexagonCallingConvLower.cpp
+ HexagonCFGOptimizer.cpp
+ HexagonExpandPredSpillCode.cpp
+ HexagonFrameLowering.cpp
+ HexagonHardwareLoops.cpp
+ HexagonInstrInfo.cpp
+ HexagonISelDAGToDAG.cpp
+ HexagonISelLowering.cpp
+ HexagonMCAsmInfo.cpp
+ HexagonOptimizeSZExtends.cpp
+ HexagonRegisterInfo.cpp
+ HexagonRemoveSZExtArgs.cpp
+ HexagonSelectionDAGInfo.cpp
+ HexagonSplitTFRCondSets.cpp
+ HexagonSubtarget.cpp
+ HexagonTargetMachine.cpp
+ HexagonTargetObjectFile.cpp
+ )
+
+add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
+
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
new file mode 100644
index 0000000..a5f2279
--- /dev/null
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -0,0 +1,54 @@
+//=-- Hexagon.h - Top-level interface for Hexagon representation --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Hexagon back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_Hexagon_H
+#define TARGET_Hexagon_H
+
+#include <cassert>
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+ class FunctionPass;
+ class TargetMachine;
+ class HexagonTargetMachine;
+ class raw_ostream;
+
+ FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM);
+ FunctionPass *createHexagonDelaySlotFillerPass(TargetMachine &TM);
+ FunctionPass *createHexagonFPMoverPass(TargetMachine &TM);
+ FunctionPass *createHexagonRemoveExtendOps(HexagonTargetMachine &TM);
+ FunctionPass *createHexagonCFGOptimizer(HexagonTargetMachine &TM);
+
+ FunctionPass* createHexagonSplitTFRCondSets(HexagonTargetMachine &TM);
+ FunctionPass* createHexagonExpandPredSpillCode(HexagonTargetMachine &TM);
+
+ FunctionPass *createHexagonHardwareLoops();
+ FunctionPass *createHexagonOptimizeSZExtends();
+ FunctionPass *createHexagonFixupHwLoops();
+
+} // end namespace llvm;
+
+#define Hexagon_POINTER_SIZE 4
+
+#define Hexagon_PointerSize (Hexagon_POINTER_SIZE)
+#define Hexagon_PointerSize_Bits (Hexagon_POINTER_SIZE * 8)
+#define Hexagon_WordSize Hexagon_PointerSize
+#define Hexagon_WordSize_Bits Hexagon_PointerSize_Bits
+
+// allocframe saves LR and FP on stack before allocating
+// a new stack frame. This takes 8 bytes.
+#define HEXAGON_LRFP_SIZE 8
+
+#endif
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
new file mode 100644
index 0000000..72939e6
--- /dev/null
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -0,0 +1,66 @@
+//===- Hexagon.td - Describe the Hexagon Target Machine ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Hexagon Subtarget features.
+//
+
+
+// Hexagon Archtectures
+def ArchV2 : SubtargetFeature<"v2", "HexagonArchVersion", "V2",
+ "Hexagon v2">;
+def ArchV3 : SubtargetFeature<"v3", "HexagonArchVersion", "V3",
+ "Hexagon v3">;
+def ArchV4 : SubtargetFeature<"v4", "HexagonArchVersion", "V4",
+ "Hexagon v4">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+include "HexagonSchedule.td"
+include "HexagonRegisterInfo.td"
+include "HexagonCallingConv.td"
+include "HexagonInstrInfo.td"
+include "HexagonIntrinsics.td"
+include "HexagonIntrinsicsDerived.td"
+
+
+def HexagonInstrInfo : InstrInfo {
+ // Define how we want to layout our target-specific information field.
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, ProcessorItineraries Itin,
+ list<SubtargetFeature> Features>
+ : Processor<Name, Itin, Features>;
+
+def : Proc<"hexagonv2", HexagonItineraries, [ArchV2]>;
+def : Proc<"hexagonv3", HexagonItineraries, [ArchV2, ArchV3]>;
+def : Proc<"hexagonv4", HexagonItinerariesV4, [ArchV2, ArchV3, ArchV4]>;
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def Hexagon : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = HexagonInstrInfo;
+}
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
new file mode 100644
index 0000000..8f8e804
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -0,0 +1,555 @@
+//===-- HexagonAsmPrinter.cpp - Print machine instrs to Hexagon assembly ----=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to Hexagon assembly language. This printer is
+// the output mechanism used by `llc'.
+//
+// Documentation at http://developer.apple.com/documentation/DeveloperTools/
+// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html
+//
+//===----------------------------------------------------------------------===//
+
+
+#define DEBUG_TYPE "asm-printer"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool> AlignCalls(
+ "hexagon-align-calls", cl::Hidden, cl::init(true),
+ cl::desc("Insert falign after call instruction for Hexagon target"));
+
+
+namespace {
+ class HexagonAsmPrinter : public AsmPrinter {
+ const HexagonSubtarget *Subtarget;
+
+ public:
+ explicit HexagonAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {
+ Subtarget = &TM.getSubtarget<HexagonSubtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "Hexagon Assembly Printer";
+ }
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description. This method returns true if the
+ /// machine instruction was sufficiently described to print it, otherwise it
+ void printInstruction(const MachineInstr *MI, raw_ostream &O);
+ virtual void EmitInstruction(const MachineInstr *MI);
+
+ void printOp(const MachineOperand &MO, raw_ostream &O);
+
+ /// printRegister - Print register according to target requirements.
+ ///
+ void printRegister(const MachineOperand &MO, bool R0AsZero,
+ raw_ostream &O) {
+ unsigned RegNo = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??");
+ O << getRegisterName(RegNo);
+ }
+
+ void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (MO.isReg()) {
+ printRegister(MO, false, OS);
+ } else if (MO.isImm()) {
+ OS << MO.getImm();
+ } else {
+ printOp(MO, OS);
+ }
+ }
+
+
+ bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const;
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS);
+
+
+ void printHexagonImmOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ int value = MI->getOperand(OpNo).getImm();
+ O << value;
+ }
+
+
+ void printHexagonNegImmOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ int value = MI->getOperand(OpNo).getImm();
+ O << -value;
+ }
+
+ void printHexagonMEMriOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO1 = MI->getOperand(OpNo);
+ const MachineOperand &MO2 = MI->getOperand(OpNo+1);
+
+ O << getRegisterName(MO1.getReg())
+ << " + #"
+ << (int) MO2.getImm();
+ }
+
+
+ void printHexagonFrameIndexOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO1 = MI->getOperand(OpNo);
+ const MachineOperand &MO2 = MI->getOperand(OpNo+1);
+
+ O << getRegisterName(MO1.getReg())
+ << ", #"
+ << MO2.getImm();
+ }
+
+ void printBranchOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ // Branches can take an immediate operand. This is used by the branch
+ // selection pass to print $+8, an eight byte displacement from the PC.
+ if (MI->getOperand(OpNo).isImm()) {
+ O << "$+" << MI->getOperand(OpNo).getImm()*4;
+ } else {
+ printOp(MI->getOperand(OpNo), O);
+ }
+ }
+
+ void printCallOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ }
+
+ void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ }
+
+
+ void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ O << "#HI(";
+ if (MI->getOperand(OpNo).isImm()) {
+ printHexagonImmOperand(MI, OpNo, O);
+ } else {
+ printOp(MI->getOperand(OpNo), O);
+ }
+ O << ")";
+ }
+
+ void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ O << "#HI(";
+ if (MI->getOperand(OpNo).isImm()) {
+ printHexagonImmOperand(MI, OpNo, O);
+ } else {
+ printOp(MI->getOperand(OpNo), O);
+ }
+ O << ")";
+ }
+
+ void printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O);
+
+ void printAddrModeBasePlusOffset(const MachineInstr *MI, int OpNo,
+ raw_ostream &O);
+
+ void printGlobalOperand(const MachineInstr *MI, int OpNo, raw_ostream &O);
+ void printJumpTable(const MachineInstr *MI, int OpNo, raw_ostream &O);
+
+ void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const;
+
+ static const char *getRegisterName(unsigned RegNo);
+ };
+
+} // end of anonymous namespace
+
+// Include the auto-generated portion of the assembly writer.
+#include "HexagonGenAsmWriter.inc"
+
+
+void HexagonAsmPrinter::EmitAlignment(unsigned NumBits,
+ const GlobalValue *GV) const {
+
+ // For basic block level alignment, use falign.
+ if (!GV) {
+ OutStreamer.EmitRawText(StringRef("\t.falign"));
+ return;
+ }
+
+ AsmPrinter::EmitAlignment(NumBits, GV);
+}
+
+void HexagonAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate:
+ dbgs() << "printOp() does not handle immediate values\n";
+ abort();
+ return;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_JumpTableIndex:
+ O << *GetJTISymbol(MO.getIndex());
+ // FIXME: PIC relocation model.
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << *GetCPISymbol(MO.getIndex());
+ return;
+ case MachineOperand::MO_ExternalSymbol:
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ // Computing the address of a global symbol, not calling it.
+ O << *Mang->getSymbol(MO.getGlobal());
+ printOffset(MO.getOffset(), O);
+ return;
+ }
+
+ default:
+ O << "<unknown operand type: " << MO.getType() << ">";
+ return;
+ }
+}
+
+
+//
+// isBlockOnlyReachableByFallthrough - We need to override this since the
+// default AsmPrinter does not print labels for any basic block that
+// is only reachable by a fall through. That works for all cases except
+// for the case in which the basic block is reachable by a fall through but
+// through an indirect from a jump table. In this case, the jump table
+// will contain a label not defined by AsmPrinter.
+//
+bool HexagonAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+ if (MBB->hasAddressTaken()) {
+ return false;
+ }
+ return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
+}
+
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &OS) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'c': // Don't print "$" before a global var name or constant.
+ // Hexagon never has a prefix.
+ printOperand(MI, OpNo, OS);
+ return false;
+ case 'L': // Write second word of DImode reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isReg() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isReg())
+ return true;
+ ++OpNo; // Return the high-part.
+ break;
+ case 'I':
+ // Write 'i' if an integer constant, otherwise nothing. Used to print
+ // addi vs add, etc.
+ if (MI->getOperand(OpNo).isImm())
+ OS << "i";
+ return false;
+ }
+ }
+
+ printOperand(MI, OpNo, OS);
+ return false;
+}
+
+bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ const MachineOperand &Base = MI->getOperand(OpNo);
+ const MachineOperand &Offset = MI->getOperand(OpNo+1);
+
+ if (Base.isReg())
+ printOperand(MI, OpNo, O);
+ else
+ assert(0 && "Unimplemented");
+
+ if (Offset.isImm()) {
+ if (Offset.getImm())
+ O << " + #" << Offset.getImm();
+ }
+ else
+ assert(0 && "Unimplemented");
+
+ return false;
+}
+
+void HexagonAsmPrinter::printPredicateOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ raw_ostream &O) {
+ assert(0 && "Unimplemented");
+}
+
+
+/// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to
+/// the current output stream.
+///
+void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream O(Str);
+
+ const MachineFunction* MF = MI->getParent()->getParent();
+ const HexagonMachineFunctionInfo* MFI =
+ (const HexagonMachineFunctionInfo*)
+ MF->getInfo<HexagonMachineFunctionInfo>();
+
+
+
+ // Print a brace for the beginning of the packet.
+ if (MFI->isStartPacket(MI)) {
+ O << "\t{" << '\n';
+ }
+
+ DEBUG( O << "// MI = " << *MI << '\n';);
+
+ // Indent
+ O << "\t";
+
+
+ if (MI->getOpcode() == Hexagon::ENDLOOP0) {
+ if (MFI->isEndPacket(MI) && MFI->isStartPacket(MI)) {
+ O << "\t{ nop }";
+ } else {
+ O << "}";
+ }
+ printInstruction(MI, O);
+ } else if (MI->getOpcode() == Hexagon::STriwt) {
+ //
+ // Handle truncated store on Hexagon.
+ //
+ O << "\tmemw(";
+ printHexagonMEMriOperand(MI, 0, O);
+
+ O << ") = ";
+ unsigned SubRegNum =
+ TM.getRegisterInfo()->getSubReg(MI->getOperand(2)
+ .getReg(), Hexagon::subreg_loreg);
+ const char *SubRegName = getRegisterName(SubRegNum);
+ O << SubRegName << '\n';
+ } else if (MI->getOpcode() == Hexagon::MPYI_rin) {
+ // Handle multipy with -ve constant on Hexagon:
+ // "$dst =- mpyi($src1, #$src2)"
+ printOperand(MI, 0, O);
+ O << " =- mpyi(";
+ printOperand(MI, 1, O);
+ O << ", #";
+ printHexagonNegImmOperand(MI, 2, O);
+ O << ")";
+ } else if (MI->getOpcode() == Hexagon::MEMw_ADDSUBi_indexed_MEM_V4) {
+ //
+ // Handle memw(Rs+u6:2) [+-]= #U5
+ //
+ O << "\tmemw("; printHexagonMEMriOperand(MI, 0, O); O << ") ";
+ int addend = MI->getOperand(2).getImm();
+ if (addend < 0)
+ O << "-= " << "#" << -addend << '\n';
+ else
+ O << "+= " << "#" << addend << '\n';
+ } else if (MI->getOpcode() == Hexagon::MEMw_ADDSUBi_MEM_V4) {
+ //
+ // Handle memw(Rs+u6:2) [+-]= #U5
+ //
+ O << "\tmemw("; printHexagonMEMriOperand(MI, 0, O); O << ") ";
+ int addend = MI->getOperand(2).getImm();
+ if (addend < 0)
+ O << "-= " << "#" << -addend << '\n';
+ else
+ O << "+= " << "#" << addend << '\n';
+ } else if (MI->getOpcode() == Hexagon::MEMh_ADDSUBi_indexed_MEM_V4) {
+ //
+ // Handle memh(Rs+u6:1) [+-]= #U5
+ //
+ O << "\tmemh("; printHexagonMEMriOperand(MI, 0, O); O << ") ";
+ int addend = MI->getOperand(2).getImm();
+ if (addend < 0)
+ O << "-= " << "#" << -addend << '\n';
+ else
+ O << "+= " << "#" << addend << '\n';
+ } else if (MI->getOpcode() == Hexagon::MEMh_ADDSUBi_MEM_V4) {
+ //
+ // Handle memh(Rs+u6:1) [+-]= #U5
+ //
+ O << "\tmemh("; printHexagonMEMriOperand(MI, 0, O); O << ") ";
+ int addend = MI->getOperand(2).getImm();
+ if (addend < 0)
+ O << "-= " << "#" << -addend << '\n';
+ else
+ O << "+= " << "#" << addend << '\n';
+ } else if (MI->getOpcode() == Hexagon::MEMb_ADDSUBi_indexed_MEM_V4) {
+ //
+ // Handle memb(Rs+u6:1) [+-]= #U5
+ //
+ O << "\tmemb("; printHexagonMEMriOperand(MI, 0, O); O << ") ";
+ int addend = MI->getOperand(2).getImm();
+ if (addend < 0)
+ O << "-= " << "#" << -addend << '\n';
+ else
+ O << "+= " << "#" << addend << '\n';
+ } else if (MI->getOpcode() == Hexagon::MEMb_ADDSUBi_MEM_V4) {
+ //
+ // Handle memb(Rs+u6:1) [+-]= #U5
+ //
+ O << "\tmemb("; printHexagonMEMriOperand(MI, 0, O); O << ") ";
+ int addend = MI->getOperand(2).getImm();
+ if (addend < 0)
+ O << "-= " << "#" << -addend << '\n';
+ else
+ O << "+= " << "#" << addend << '\n';
+ } else if (MI->getOpcode() == Hexagon::CMPbGTri_V4) {
+ //
+ // Handle Pd=cmpb.gt(Rs,#s8)
+ //
+ O << "\t";
+ printRegister(MI->getOperand(0), false, O);
+ O << " = cmpb.gt(";
+ printRegister(MI->getOperand(1), false, O);
+ O << ", ";
+ int val = MI->getOperand(2).getImm() >> 24;
+ O << "#" << val << ")" << '\n';
+ } else if (MI->getOpcode() == Hexagon::CMPhEQri_V4) {
+ //
+ // Handle Pd=cmph.eq(Rs,#8)
+ //
+ O << "\t";
+ printRegister(MI->getOperand(0), false, O);
+ O << " = cmph.eq(";
+ printRegister(MI->getOperand(1), false, O);
+ O << ", ";
+ int val = MI->getOperand(2).getImm();
+ assert((((0 <= val) && (val <= 127)) ||
+ ((65408 <= val) && (val <= 65535))) &&
+ "Not in correct range!");
+ if (val >= 65408) val -= 65536;
+ O << "#" << val << ")" << '\n';
+ } else if (MI->getOpcode() == Hexagon::CMPhGTri_V4) {
+ //
+ // Handle Pd=cmph.gt(Rs,#8)
+ //
+ O << "\t";
+ printRegister(MI->getOperand(0), false, O);
+ O << " = cmph.gt(";
+ printRegister(MI->getOperand(1), false, O);
+ O << ", ";
+ int val = MI->getOperand(2).getImm() >> 16;
+ O << "#" << val << ")" << '\n';
+ } else {
+ printInstruction(MI, O);
+ }
+
+ // Print a brace for the end of the packet.
+ if (MFI->isEndPacket(MI) && MI->getOpcode() != Hexagon::ENDLOOP0) {
+ O << "\n\t}" << '\n';
+ }
+
+ if (AlignCalls && MI->getDesc().isCall()) {
+ O << "\n\t.falign" << "\n";
+ }
+
+ OutStreamer.EmitRawText(O.str());
+ return;
+}
+
+/// PrintUnmangledNameSafely - Print out the printable characters in the name.
+/// Don't print things like \n or \0.
+// static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
+// for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
+// Name != E; ++Name)
+// if (isprint(*Name))
+// OS << *Name;
+// }
+
+
+void HexagonAsmPrinter::printAddrModeBasePlusOffset(const MachineInstr *MI,
+ int OpNo, raw_ostream &O) {
+ const MachineOperand &MO1 = MI->getOperand(OpNo);
+ const MachineOperand &MO2 = MI->getOperand(OpNo+1);
+
+ O << getRegisterName(MO1.getReg())
+ << " + #"
+ << MO2.getImm();
+}
+
+
+void HexagonAsmPrinter::printGlobalOperand(const MachineInstr *MI, int OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ assert( (MO.getType() == MachineOperand::MO_GlobalAddress) &&
+ "Expecting global address");
+
+ O << *Mang->getSymbol(MO.getGlobal());
+ if (MO.getOffset() != 0) {
+ O << " + ";
+ O << MO.getOffset();
+ }
+}
+
+void HexagonAsmPrinter::printJumpTable(const MachineInstr *MI, int OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ assert( (MO.getType() == MachineOperand::MO_JumpTableIndex) &&
+ "Expecting jump table index");
+
+ // Hexagon_TODO: Do we need name mangling?
+ O << *GetJTISymbol(MO.getIndex());
+}
+
+extern "C" void LLVMInitializeHexagonAsmPrinter() {
+ RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget);
+}
diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
new file mode 100644
index 0000000..38000e7
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -0,0 +1,240 @@
+//===---- HexagonCFGOptimizer.cpp - CFG optimizations ---------------------===//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#define DEBUG_TYPE "hexagon_cfg"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+#include <iostream>
+
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+namespace {
+
+class HexagonCFGOptimizer : public MachineFunctionPass {
+
+private:
+ HexagonTargetMachine& QTM;
+ const HexagonSubtarget &QST;
+
+ void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*);
+
+ public:
+ static char ID;
+ HexagonCFGOptimizer(HexagonTargetMachine& TM) : MachineFunctionPass(ID),
+ QTM(TM),
+ QST(*TM.getSubtargetImpl()) {}
+
+ const char *getPassName() const {
+ return "Hexagon CFG Optimizer";
+ }
+ bool runOnMachineFunction(MachineFunction &Fn);
+};
+
+
+char HexagonCFGOptimizer::ID = 0;
+
+static bool IsConditionalBranch(int Opc) {
+ return (Opc == Hexagon::JMP_Pred) || (Opc == Hexagon::JMP_PredNot)
+ || (Opc == Hexagon::JMP_PredPt) || (Opc == Hexagon::JMP_PredNotPt);
+}
+
+
+static bool IsUnconditionalJump(int Opc) {
+ return (Opc == Hexagon::JMP);
+}
+
+
+void
+HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI,
+ MachineBasicBlock* NewTarget) {
+ const HexagonInstrInfo *QII = QTM.getInstrInfo();
+ int NewOpcode = 0;
+ switch(MI->getOpcode()) {
+ case Hexagon::JMP_Pred:
+ NewOpcode = Hexagon::JMP_PredNot;
+ break;
+
+ case Hexagon::JMP_PredNot:
+ NewOpcode = Hexagon::JMP_Pred;
+ break;
+
+ case Hexagon::JMP_PredPt:
+ NewOpcode = Hexagon::JMP_PredNotPt;
+ break;
+
+ case Hexagon::JMP_PredNotPt:
+ NewOpcode = Hexagon::JMP_PredPt;
+ break;
+
+ default:
+ assert(0 && "Cannot handle this case");
+ }
+
+ MI->setDesc(QII->get(NewOpcode));
+ MI->getOperand(1).setMBB(NewTarget);
+}
+
+
+bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
+
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock* MBB = MBBb;
+
+ // Traverse the basic block.
+ MachineBasicBlock::iterator MII = MBB->getFirstTerminator();
+ if (MII != MBB->end()) {
+ MachineInstr *MI = MII;
+ int Opc = MI->getOpcode();
+ if (IsConditionalBranch(Opc)) {
+
+ //
+ // (Case 1) Transform the code if the following condition occurs:
+ // BB1: if (p0) jump BB3
+ // ...falls-through to BB2 ...
+ // BB2: jump BB4
+ // ...next block in layout is BB3...
+ // BB3: ...
+ //
+ // Transform this to:
+ // BB1: if (!p0) jump BB4
+ // Remove BB2
+ // BB3: ...
+ //
+ // (Case 2) A variation occurs when BB3 contains a JMP to BB4:
+ // BB1: if (p0) jump BB3
+ // ...falls-through to BB2 ...
+ // BB2: jump BB4
+ // ...other basic blocks ...
+ // BB4:
+ // ...not a fall-thru
+ // BB3: ...
+ // jump BB4
+ //
+ // Transform this to:
+ // BB1: if (!p0) jump BB4
+ // Remove BB2
+ // BB3: ...
+ // BB4: ...
+ //
+ unsigned NumSuccs = MBB->succ_size();
+ MachineBasicBlock::succ_iterator SI = MBB->succ_begin();
+ MachineBasicBlock* FirstSucc = *SI;
+ MachineBasicBlock* SecondSucc = *(++SI);
+ MachineBasicBlock* LayoutSucc = NULL;
+ MachineBasicBlock* JumpAroundTarget = NULL;
+
+ if (MBB->isLayoutSuccessor(FirstSucc)) {
+ LayoutSucc = FirstSucc;
+ JumpAroundTarget = SecondSucc;
+ } else if (MBB->isLayoutSuccessor(SecondSucc)) {
+ LayoutSucc = SecondSucc;
+ JumpAroundTarget = FirstSucc;
+ } else {
+ // Odd case...cannot handle.
+ }
+
+ // The target of the unconditional branch must be JumpAroundTarget.
+ // TODO: If not, we should not invert the unconditional branch.
+ MachineBasicBlock* CondBranchTarget = NULL;
+ if ((MI->getOpcode() == Hexagon::JMP_Pred) ||
+ (MI->getOpcode() == Hexagon::JMP_PredNot)) {
+ CondBranchTarget = MI->getOperand(1).getMBB();
+ }
+
+ if (!LayoutSucc || (CondBranchTarget != JumpAroundTarget)) {
+ continue;
+ }
+
+ if ((NumSuccs == 2) && LayoutSucc && (LayoutSucc->pred_size() == 1)) {
+
+ // Ensure that BB2 has one instruction -- an unconditional jump.
+ if ((LayoutSucc->size() == 1) &&
+ IsUnconditionalJump(LayoutSucc->front().getOpcode())) {
+ MachineBasicBlock* UncondTarget =
+ LayoutSucc->front().getOperand(0).getMBB();
+ // Check if the layout successor of BB2 is BB3.
+ bool case1 = LayoutSucc->isLayoutSuccessor(JumpAroundTarget);
+ bool case2 = JumpAroundTarget->isSuccessor(UncondTarget) &&
+ JumpAroundTarget->size() >= 1 &&
+ IsUnconditionalJump(JumpAroundTarget->back().getOpcode()) &&
+ JumpAroundTarget->pred_size() == 1 &&
+ JumpAroundTarget->succ_size() == 1;
+
+ if (case1 || case2) {
+ InvertAndChangeJumpTarget(MI, UncondTarget);
+ MBB->removeSuccessor(JumpAroundTarget);
+ MBB->addSuccessor(UncondTarget);
+
+ // Remove the unconditional branch in LayoutSucc.
+ LayoutSucc->erase(LayoutSucc->begin());
+ LayoutSucc->removeSuccessor(UncondTarget);
+ LayoutSucc->addSuccessor(JumpAroundTarget);
+
+ // This code performs the conversion for case 2, which moves
+ // the block to the fall-thru case (BB3 in the code above).
+ if (case2 && !case1) {
+ JumpAroundTarget->moveAfter(LayoutSucc);
+ // only move a block if it doesn't have a fall-thru. otherwise
+ // the CFG will be incorrect.
+ if (!UncondTarget->canFallThrough()) {
+ UncondTarget->moveAfter(JumpAroundTarget);
+ }
+ }
+
+ //
+ // Correct live-in information. Is used by post-RA scheduler
+ // The live-in to LayoutSucc is now all values live-in to
+ // JumpAroundTarget.
+ //
+ std::vector<unsigned> OrigLiveIn(LayoutSucc->livein_begin(),
+ LayoutSucc->livein_end());
+ std::vector<unsigned> NewLiveIn(JumpAroundTarget->livein_begin(),
+ JumpAroundTarget->livein_end());
+ for (unsigned i = 0; i < OrigLiveIn.size(); ++i) {
+ LayoutSucc->removeLiveIn(OrigLiveIn[i]);
+ }
+ for (unsigned i = 0; i < NewLiveIn.size(); ++i) {
+ LayoutSucc->addLiveIn(NewLiveIn[i]);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ return true;
+}
+}
+
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonCFGOptimizer(HexagonTargetMachine &TM) {
+ return new HexagonCFGOptimizer(TM);
+}
diff --git a/lib/Target/Hexagon/HexagonCallingConv.td b/lib/Target/Hexagon/HexagonCallingConv.td
new file mode 100644
index 0000000..bd9608b
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCallingConv.td
@@ -0,0 +1,35 @@
+//===- HexagonCallingConv.td - Calling Conventions Hexagon -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the Hexagon architectures.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Hexagon 32-bit C return-value convention.
+def RetCC_Hexagon32 : CallingConv<[
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
+ CCIfType<[i64], CCAssignToReg<[D0, D1, D2]>>,
+
+ // Alternatively, they are assigned to the stack in 4-byte aligned units.
+ CCAssignToStack<4, 4>
+]>;
+
+// Hexagon 32-bit C Calling convention.
+def CC_Hexagon32 : CallingConv<[
+ // All arguments get passed in integer registers if there is space.
+ CCIfType<[i32, i16, i8], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
+ CCIfType<[i64], CCAssignToReg<[D0, D1, D2]>>,
+
+ // Alternatively, they are assigned to the stack in 4-byte aligned units.
+ CCAssignToStack<4, 4>
+]>;
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
new file mode 100644
index 0000000..2e51dbf
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
@@ -0,0 +1,207 @@
+//===-- llvm/CallingConvLower.cpp - Calling Convention lowering -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Hexagon_CCState class, used for lowering and
+// implementing calling conventions. Adapted from the machine independent
+// version of the class (CCState) but this handles calls to varargs functions
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonCallingConvLower.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "Hexagon.h"
+using namespace llvm;
+
+Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg,
+ const TargetMachine &tm,
+ SmallVector<CCValAssign, 16> &locs,
+ LLVMContext &c)
+ : CallingConv(CC), IsVarArg(isVarArg), TM(tm),
+ TRI(*TM.getRegisterInfo()), Locs(locs), Context(c) {
+ // No stack is used.
+ StackOffset = 0;
+
+ UsedRegs.resize((TRI.getNumRegs()+31)/32);
+}
+
+// HandleByVal - Allocate a stack slot large enough to pass an argument by
+// value. The size and alignment information of the argument is encoded in its
+// parameter attribute.
+void Hexagon_CCState::HandleByVal(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ int MinSize, int MinAlign,
+ ISD::ArgFlagsTy ArgFlags) {
+ unsigned Align = ArgFlags.getByValAlign();
+ unsigned Size = ArgFlags.getByValSize();
+ if (MinSize > (int)Size)
+ Size = MinSize;
+ if (MinAlign > (int)Align)
+ Align = MinAlign;
+ unsigned Offset = AllocateStack(Size, Align);
+
+ addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset,
+ LocVT.getSimpleVT(), LocInfo));
+}
+
+/// MarkAllocated - Mark a register and all of its aliases as allocated.
+void Hexagon_CCState::MarkAllocated(unsigned Reg) {
+ UsedRegs[Reg/32] |= 1 << (Reg&31);
+
+ if (const unsigned *RegAliases = TRI.getAliasSet(Reg))
+ for (; (Reg = *RegAliases); ++RegAliases)
+ UsedRegs[Reg/32] |= 1 << (Reg&31);
+}
+
+/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
+/// incorporating info about the formals into this state.
+void
+Hexagon_CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ Hexagon_CCAssignFn Fn,
+ unsigned SretValueInRegs) {
+ unsigned NumArgs = Ins.size();
+ unsigned i = 0;
+
+ // If the function returns a small struct in registers, skip
+ // over the first (dummy) argument.
+ if (SretValueInRegs != 0) {
+ ++i;
+ }
+
+
+ for (; i != NumArgs; ++i) {
+ EVT ArgVT = Ins[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, 0, 0, false)) {
+ dbgs() << "Formal argument #" << i << " has unhandled type "
+ << ArgVT.getEVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
+/// incorporating info about the result values into this state.
+void
+Hexagon_CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ Hexagon_CCAssignFn Fn,
+ unsigned SretValueInRegs) {
+
+ // For Hexagon, Return small structures in registers.
+ if (SretValueInRegs != 0) {
+ if (SretValueInRegs <= 32) {
+ unsigned Reg = Hexagon::R0;
+ addLoc(CCValAssign::getReg(0, MVT::i32, Reg, MVT::i32,
+ CCValAssign::Full));
+ return;
+ }
+ if (SretValueInRegs <= 64) {
+ unsigned Reg = Hexagon::D0;
+ addLoc(CCValAssign::getReg(0, MVT::i64, Reg, MVT::i64,
+ CCValAssign::Full));
+ return;
+ }
+ }
+
+
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ EVT VT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this, -1, -1, false)){
+ dbgs() << "Return operand #" << i << " has unhandled type "
+ << VT.getEVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+
+/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
+/// about the passed values into this state.
+void
+Hexagon_CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg>
+ &Outs,
+ Hexagon_CCAssignFn Fn,
+ int NonVarArgsParams,
+ unsigned SretValueSize) {
+ unsigned NumOps = Outs.size();
+
+ unsigned i = 0;
+ // If the called function returns a small struct in registers, skip
+ // the first actual parameter. We do not want to pass a pointer to
+ // the stack location.
+ if (SretValueSize != 0) {
+ ++i;
+ }
+
+ for (; i != NumOps; ++i) {
+ EVT ArgVT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this,
+ NonVarArgsParams, i+1, false)) {
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << ArgVT.getEVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeCallOperands - Same as above except it takes vectors of types
+/// and argument flags.
+void
+Hexagon_CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+ Hexagon_CCAssignFn Fn) {
+ unsigned NumOps = ArgVTs.size();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ EVT ArgVT = ArgVTs[i];
+ ISD::ArgFlagsTy ArgFlags = Flags[i];
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, -1, -1,
+ false)) {
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << ArgVT.getEVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
+/// incorporating info about the passed values into this state.
+void
+Hexagon_CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+ Hexagon_CCAssignFn Fn,
+ unsigned SretValueInRegs) {
+
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ EVT VT = Ins[i].VT;
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this, -1, -1, false)) {
+ dbgs() << "Call result #" << i << " has unhandled type "
+ << VT.getEVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeCallResult - Same as above except it's specialized for calls which
+/// produce a single value.
+void Hexagon_CCState::AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn) {
+ if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this, -1, -1,
+ false)) {
+ dbgs() << "Call result has unhandled type "
+ << VT.getEVTString() << "\n";
+ abort();
+ }
+}
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.h b/lib/Target/Hexagon/HexagonCallingConvLower.h
new file mode 100644
index 0000000..1f601e8
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.h
@@ -0,0 +1,189 @@
+//===-- HexagonCallingConvLower.h - Calling Conventions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon_CCState class, used for lowering
+// and implementing calling conventions. Adapted from the target independent
+// version but this handles calls to varargs functions
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H
+#define LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+
+//
+// Need to handle varargs.
+//
+namespace llvm {
+ class TargetRegisterInfo;
+ class TargetMachine;
+ class Hexagon_CCState;
+ class SDNode;
+
+
+/// Hexagon_CCAssignFn - This function assigns a location for Val, updating
+/// State to reflect the change.
+typedef bool Hexagon_CCAssignFn(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, Hexagon_CCState &State,
+ int NonVarArgsParams,
+ int CurrentParam,
+ bool ForceMem);
+
+
+/// CCState - This class holds information needed while lowering arguments and
+/// return values. It captures which registers are already assigned and which
+/// stack slots are used. It provides accessors to allocate these values.
+class Hexagon_CCState {
+ CallingConv::ID CallingConv;
+ bool IsVarArg;
+ const TargetMachine &TM;
+ const TargetRegisterInfo &TRI;
+ SmallVector<CCValAssign, 16> &Locs;
+ LLVMContext &Context;
+
+ unsigned StackOffset;
+ SmallVector<uint32_t, 16> UsedRegs;
+public:
+ Hexagon_CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &TM,
+ SmallVector<CCValAssign, 16> &locs, LLVMContext &c);
+
+ void addLoc(const CCValAssign &V) {
+ Locs.push_back(V);
+ }
+
+ LLVMContext &getContext() const { return Context; }
+ const TargetMachine &getTarget() const { return TM; }
+ unsigned getCallingConv() const { return CallingConv; }
+ bool isVarArg() const { return IsVarArg; }
+
+ unsigned getNextStackOffset() const { return StackOffset; }
+
+ /// isAllocated - Return true if the specified register (or an alias) is
+ /// allocated.
+ bool isAllocated(unsigned Reg) const {
+ return UsedRegs[Reg/32] & (1 << (Reg&31));
+ }
+
+ /// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
+ /// incorporating info about the formals into this state.
+ void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+ Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
+
+ /// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
+ /// incorporating info about the result values into this state.
+ void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
+
+ /// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
+ /// about the passed values into this state.
+ void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ Hexagon_CCAssignFn Fn, int NonVarArgsParams,
+ unsigned SretValueSize);
+
+ /// AnalyzeCallOperands - Same as above except it takes vectors of types
+ /// and argument flags.
+ void AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+ Hexagon_CCAssignFn Fn);
+
+ /// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
+ /// incorporating info about the passed values into this state.
+ void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+ Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
+
+ /// AnalyzeCallResult - Same as above except it's specialized for calls which
+ /// produce a single value.
+ void AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn);
+
+ /// getFirstUnallocated - Return the first unallocated register in the set, or
+ /// NumRegs if they are all allocated.
+ unsigned getFirstUnallocated(const unsigned *Regs, unsigned NumRegs) const {
+ for (unsigned i = 0; i != NumRegs; ++i)
+ if (!isAllocated(Regs[i]))
+ return i;
+ return NumRegs;
+ }
+
+ /// AllocateReg - Attempt to allocate one register. If it is not available,
+ /// return zero. Otherwise, return the register, marking it and any aliases
+ /// as allocated.
+ unsigned AllocateReg(unsigned Reg) {
+ if (isAllocated(Reg)) return 0;
+ MarkAllocated(Reg);
+ return Reg;
+ }
+
+ /// Version of AllocateReg with extra register to be shadowed.
+ unsigned AllocateReg(unsigned Reg, unsigned ShadowReg) {
+ if (isAllocated(Reg)) return 0;
+ MarkAllocated(Reg);
+ MarkAllocated(ShadowReg);
+ return Reg;
+ }
+
+ /// AllocateReg - Attempt to allocate one of the specified registers. If none
+ /// are available, return zero. Otherwise, return the first one available,
+ /// marking it and any aliases as allocated.
+ unsigned AllocateReg(const unsigned *Regs, unsigned NumRegs) {
+ unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
+ if (FirstUnalloc == NumRegs)
+ return 0; // Didn't find the reg.
+
+ // Mark the register and any aliases as allocated.
+ unsigned Reg = Regs[FirstUnalloc];
+ MarkAllocated(Reg);
+ return Reg;
+ }
+
+ /// Version of AllocateReg with list of registers to be shadowed.
+ unsigned AllocateReg(const unsigned *Regs, const unsigned *ShadowRegs,
+ unsigned NumRegs) {
+ unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
+ if (FirstUnalloc == NumRegs)
+ return 0; // Didn't find the reg.
+
+ // Mark the register and any aliases as allocated.
+ unsigned Reg = Regs[FirstUnalloc], ShadowReg = ShadowRegs[FirstUnalloc];
+ MarkAllocated(Reg);
+ MarkAllocated(ShadowReg);
+ return Reg;
+ }
+
+ /// AllocateStack - Allocate a chunk of stack space with the specified size
+ /// and alignment.
+ unsigned AllocateStack(unsigned Size, unsigned Align) {
+ assert(Align && ((Align-1) & Align) == 0); // Align is power of 2.
+ StackOffset = ((StackOffset + Align-1) & ~(Align-1));
+ unsigned Result = StackOffset;
+ StackOffset += Size;
+ return Result;
+ }
+
+ // HandleByVal - Allocate a stack slot large enough to pass an argument by
+ // value. The size and alignment information of the argument is encoded in its
+ // parameter attribute.
+ void HandleByVal(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags);
+
+private:
+ /// MarkAllocated - Mark a register and all of its aliases as allocated.
+ void MarkAllocated(unsigned Reg);
+};
+
+
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
new file mode 100644
index 0000000..cb73ae0
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -0,0 +1,184 @@
+//===--- HexagonExpandPredSpillCode.cpp - Expand Predicate Spill Code ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===////
+// The Hexagon processor has no instructions that load or store predicate
+// registers directly. So, when these registers must be spilled a general
+// purpose register must be found and the value copied to/from it from/to
+// the predicate register. This code currently does not use the register
+// scavenger mechanism available in the allocator. There are two registers
+// reserved to allow spilling/restoring predicate registers. One is used to
+// hold the predicate value. The other is used when stack frame offsets are
+// too large.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+#include <map>
+#include <iostream>
+
+#include "llvm/Support/CommandLine.h"
+
+
+using namespace llvm;
+
+
+namespace {
+
+class HexagonExpandPredSpillCode : public MachineFunctionPass {
+ HexagonTargetMachine& QTM;
+ const HexagonSubtarget &QST;
+
+ public:
+ static char ID;
+ HexagonExpandPredSpillCode(HexagonTargetMachine& TM) :
+ MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {}
+
+ const char *getPassName() const {
+ return "Hexagon Expand Predicate Spill Code";
+ }
+ bool runOnMachineFunction(MachineFunction &Fn);
+};
+
+
+char HexagonExpandPredSpillCode::ID = 0;
+
+
+bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
+
+ const HexagonInstrInfo *TII = QTM.getInstrInfo();
+ const HexagonRegisterInfo *RegInfo = QTM.getRegisterInfo();
+
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock* MBB = MBBb;
+ // Traverse the basic block.
+ for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+ ++MII) {
+ MachineInstr *MI = MII;
+ int Opc = MI->getOpcode();
+ if (Opc == Hexagon::STriw_pred) {
+ // STriw_pred [R30], ofst, SrcReg;
+ unsigned FP = MI->getOperand(0).getReg();
+ assert(FP == RegInfo->getFrameRegister() &&
+ "Not a Frame Pointer, Nor a Spill Slot");
+ assert(MI->getOperand(1).isImm() && "Not an offset");
+ int Offset = MI->getOperand(1).getImm();
+ int SrcReg = MI->getOperand(2).getReg();
+ assert(Hexagon::PredRegsRegClass.contains(SrcReg) &&
+ "Not a predicate register");
+ if (!TII->isValidOffset(Hexagon::STriw, Offset)) {
+ if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::CONST32_Int_Real),
+ HEXAGON_RESERVED_REG_1).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr),
+ HEXAGON_RESERVED_REG_1)
+ .addReg(FP).addReg(HEXAGON_RESERVED_REG_1);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+ HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::STriw))
+ .addReg(HEXAGON_RESERVED_REG_1)
+ .addImm(0).addReg(HEXAGON_RESERVED_REG_2);
+ } else {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri),
+ HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+ HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::STriw))
+ .addReg(HEXAGON_RESERVED_REG_1)
+ .addImm(0)
+ .addReg(HEXAGON_RESERVED_REG_2);
+ }
+ } else {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+ HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::STriw)).
+ addReg(FP).addImm(Offset).addReg(HEXAGON_RESERVED_REG_2);
+ }
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::LDriw_pred) {
+ // DstReg = LDriw_pred [R30], ofst.
+ int DstReg = MI->getOperand(0).getReg();
+ assert(Hexagon::PredRegsRegClass.contains(DstReg) &&
+ "Not a predicate register");
+ unsigned FP = MI->getOperand(1).getReg();
+ assert(FP == RegInfo->getFrameRegister() &&
+ "Not a Frame Pointer, Nor a Spill Slot");
+ assert(MI->getOperand(2).isImm() && "Not an offset");
+ int Offset = MI->getOperand(2).getImm();
+ if (!TII->isValidOffset(Hexagon::LDriw, Offset)) {
+ if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::CONST32_Int_Real),
+ HEXAGON_RESERVED_REG_1).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr),
+ HEXAGON_RESERVED_REG_1)
+ .addReg(FP)
+ .addReg(HEXAGON_RESERVED_REG_1);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+ HEXAGON_RESERVED_REG_2)
+ .addReg(HEXAGON_RESERVED_REG_1)
+ .addImm(0);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+ DstReg).addReg(HEXAGON_RESERVED_REG_2);
+ } else {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri),
+ HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+ HEXAGON_RESERVED_REG_2)
+ .addReg(HEXAGON_RESERVED_REG_1)
+ .addImm(0);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+ DstReg).addReg(HEXAGON_RESERVED_REG_2);
+ }
+ } else {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+ HEXAGON_RESERVED_REG_2).addReg(FP).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+ DstReg).addReg(HEXAGON_RESERVED_REG_2);
+ }
+ MII = MBB->erase(MI);
+ --MII;
+ }
+ }
+ }
+
+ return true;
+}
+
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonExpandPredSpillCode(HexagonTargetMachine &TM) {
+ return new HexagonExpandPredSpillCode(TM);
+}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
new file mode 100644
index 0000000..78e0b1c
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -0,0 +1,333 @@
+//==-- HexagonFrameLowering.cpp - Define frame lowering --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//
+//===----------------------------------------------------------------------===//
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonFrameLowering.h"
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <iostream>
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Function.h"
+using namespace llvm;
+
+static cl::opt<bool> DisableDeallocRet(
+ "disable-hexagon-dealloc-ret",
+ cl::Hidden,
+ cl::desc("Disable Dealloc Return for Hexagon target"));
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void HexagonFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get the alignments provided by the target.
+ unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ // Get the maximum call frame size of all the calls.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+ // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+ // that allocations will be aligned.
+ if (MFI->hasVarSizedObjects())
+ maxCallFrameSize = RoundUpToAlignment(maxCallFrameSize, TargetAlign);
+
+ // Update maximum call frame size.
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+ // Include call frame size in total.
+ FrameSize += maxCallFrameSize;
+
+ // Make sure the frame is aligned.
+ FrameSize = RoundUpToAlignment(FrameSize, TargetAlign);
+
+ // Update frame info.
+ MFI->setStackSize(FrameSize);
+}
+
+
+void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ const HexagonRegisterInfo *QRI =
+ static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo());
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ determineFrameLayout(MF);
+
+ // Check if frame moves are needed for EH.
+ bool needsFrameMoves = MMI.hasDebugInfo() ||
+ !MF.getFunction()->needsUnwindTableEntry();
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ int NumBytes = (int) MFI->getStackSize();
+
+ // LLVM expects allocframe not to be the first instruction in the
+ // basic block.
+ MachineBasicBlock::iterator InsertPt = MBB.begin();
+
+ //
+ // ALLOCA adjust regs. Iterate over ADJDYNALLOC nodes and change the offset.
+ //
+ HexagonMachineFunctionInfo *FuncInfo =
+ MF.getInfo<HexagonMachineFunctionInfo>();
+ const std::vector<MachineInstr*>& AdjustRegs =
+ FuncInfo->getAllocaAdjustInsts();
+ for (std::vector<MachineInstr*>::const_iterator i = AdjustRegs.begin(),
+ e = AdjustRegs.end();
+ i != e; ++i) {
+ MachineInstr* MI = *i;
+ assert((MI->getOpcode() == Hexagon::ADJDYNALLOC) &&
+ "Expected adjust alloca node");
+
+ MachineOperand& MO = MI->getOperand(2);
+ assert(MO.isImm() && "Expected immediate");
+ MO.setImm(MFI->getMaxCallFrameSize());
+ }
+
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+ if (needsFrameMoves) {
+ // Advance CFA. DW_CFA_def_cfa
+ unsigned FPReg = QRI->getFrameRegister();
+ unsigned RAReg = QRI->getRARegister();
+
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(FPReg, -8);
+ Moves.push_back(MachineMove(0, Dst, Src));
+
+ // R31 = (R31 - #4)
+ MachineLocation LRDst(RAReg, -4);
+ MachineLocation LRSrc(RAReg);
+ Moves.push_back(MachineMove(0, LRDst, LRSrc));
+
+ // R30 = (R30 - #8)
+ MachineLocation SPDst(FPReg, -8);
+ MachineLocation SPSrc(FPReg);
+ Moves.push_back(MachineMove(0, SPDst, SPSrc));
+ }
+
+ //
+ // Only insert ALLOCFRAME if we need to.
+ //
+ if (hasFP(MF)) {
+ // Check for overflow.
+ // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
+ const int ALLOCFRAME_MAX = 16384;
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ if (NumBytes >= ALLOCFRAME_MAX) {
+ // Emit allocframe(#0).
+ BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(0);
+
+ // Subtract offset from frame pointer.
+ BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::CONST32_Int_Real),
+ HEXAGON_RESERVED_REG_1).addImm(NumBytes);
+ BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::SUB_rr),
+ QRI->getStackRegister()).
+ addReg(QRI->getStackRegister()).
+ addReg(HEXAGON_RESERVED_REG_1);
+ } else {
+ BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(NumBytes);
+ }
+ }
+}
+// Returns true if MBB has a machine instructions that indicates a tail call
+// in the block.
+bool HexagonFrameLowering::hasTailCall(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ unsigned RetOpcode = MBBI->getOpcode();
+
+ return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext;}
+
+void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ DebugLoc dl = MBBI->getDebugLoc();
+ //
+ // Only insert deallocframe if we need to.
+ //
+ if (hasFP(MF)) {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ MachineBasicBlock::iterator MBBI_end = MBB.end();
+ //
+ // For Hexagon, we don't need the frame size.
+ //
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ int NumBytes = (int) MFI->getStackSize();
+
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ // Replace 'jumpr r31' instruction with dealloc_return for V4 and higher
+ // versions.
+ if (STI.hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPR
+ && !DisableDeallocRet) {
+ // Remove jumpr node.
+ MBB.erase(MBBI);
+ // Add dealloc_return.
+ BuildMI(MBB, MBBI_end, dl, TII.get(Hexagon::DEALLOC_RET_V4))
+ .addImm(NumBytes);
+ } else { // Add deallocframe for V2 and V3.
+ BuildMI(MBB, MBBI, dl, TII.get(Hexagon::DEALLOCFRAME)).addImm(NumBytes);
+ }
+ }
+}
+
+bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const HexagonMachineFunctionInfo *FuncInfo =
+ MF.getInfo<HexagonMachineFunctionInfo>();
+ return (MFI->hasCalls() || (MFI->getStackSize() > 0) ||
+ FuncInfo->hasClobberLR() );
+}
+
+bool
+HexagonFrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction *MF = MBB.getParent();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+ if (CSI.empty()) {
+ return false;
+ }
+
+ // We can only schedule double loads if we spill contiguous callee-saved regs
+ // For instance, we cannot scheduled double-word loads if we spill r24,
+ // r26, and r27.
+ // Hexagon_TODO: We can try to double-word align odd registers for -O2 and
+ // above.
+ bool ContiguousRegs = true;
+
+ for (unsigned i = 0; i < CSI.size(); ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ //
+ // Check if we can use a double-word store.
+ //
+ const unsigned* SuperReg = TRI->getSuperRegisters(Reg);
+
+ // Assume that there is exactly one superreg.
+ assert(SuperReg[0] && !SuperReg[1] && "Expected exactly one superreg");
+ bool CanUseDblStore = false;
+ const TargetRegisterClass* SuperRegClass = 0;
+
+ if (ContiguousRegs && (i < CSI.size()-1)) {
+ const unsigned* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg());
+ assert(SuperRegNext[0] && !SuperRegNext[1] &&
+ "Expected exactly one superreg");
+ SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg[0]);
+ CanUseDblStore = (SuperRegNext[0] == SuperReg[0]);
+ }
+
+
+ if (CanUseDblStore) {
+ TII.storeRegToStackSlot(MBB, MI, SuperReg[0], true,
+ CSI[i+1].getFrameIdx(), SuperRegClass, TRI);
+ MBB.addLiveIn(SuperReg[0]);
+ ++i;
+ } else {
+ // Cannot use a double-word store.
+ ContiguousRegs = false;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RC,
+ TRI);
+ MBB.addLiveIn(Reg);
+ }
+ }
+ return true;
+}
+
+
+bool HexagonFrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+
+ MachineFunction *MF = MBB.getParent();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+ if (CSI.empty()) {
+ return false;
+ }
+
+ // We can only schedule double loads if we spill contiguous callee-saved regs
+ // For instance, we cannot scheduled double-word loads if we spill r24,
+ // r26, and r27.
+ // Hexagon_TODO: We can try to double-word align odd registers for -O2 and
+ // above.
+ bool ContiguousRegs = true;
+
+ for (unsigned i = 0; i < CSI.size(); ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ //
+ // Check if we can use a double-word load.
+ //
+ const unsigned* SuperReg = TRI->getSuperRegisters(Reg);
+ const TargetRegisterClass* SuperRegClass = 0;
+
+ // Assume that there is exactly one superreg.
+ assert(SuperReg[0] && !SuperReg[1] && "Expected exactly one superreg");
+ bool CanUseDblLoad = false;
+ if (ContiguousRegs && (i < CSI.size()-1)) {
+ const unsigned* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg());
+ assert(SuperRegNext[0] && !SuperRegNext[1] &&
+ "Expected exactly one superreg");
+ SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg[0]);
+ CanUseDblLoad = (SuperRegNext[0] == SuperReg[0]);
+ }
+
+
+ if (CanUseDblLoad) {
+ TII.loadRegFromStackSlot(MBB, MI, SuperReg[0], CSI[i+1].getFrameIdx(),
+ SuperRegClass, TRI);
+ MBB.addLiveIn(SuperReg[0]);
+ ++i;
+ } else {
+ // Cannot use a double-word load.
+ ContiguousRegs = false;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
+ MBB.addLiveIn(Reg);
+ }
+ }
+ return true;
+}
+
+int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ return MF.getFrameInfo()->getObjectOffset(FI);
+}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
new file mode 100644
index 0000000..ad87f11
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -0,0 +1,50 @@
+//=- HexagonFrameLowering.h - Define frame lowering for Hexagon --*- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGON_FRAMEINFO_H
+#define HEXAGON_FRAMEINFO_H
+
+#include "Hexagon.h"
+#include "HexagonSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class HexagonFrameLowering : public TargetFrameLowering {
+private:
+ const HexagonSubtarget &STI;
+ void determineFrameLayout(MachineFunction &MF) const;
+
+public:
+ explicit HexagonFrameLowering(const HexagonSubtarget &sti)
+ : TargetFrameLowering(StackGrowsDown, 8, 0), STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ virtual bool
+ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ virtual bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasTailCall(MachineBasicBlock &MBB) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
new file mode 100644
index 0000000..c1abc4a
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -0,0 +1,644 @@
+//===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass identifies loops where we can generate the Hexagon hardware
+// loop instruction. The hardware loop can perform loop branches with a
+// zero-cycle overhead.
+//
+// The pattern that defines the induction variable can changed depending on
+// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
+// normalizes induction variables, and the Loop Strength Reduction pass
+// run by 'llc' may also make changes to the induction variable.
+// The pattern detected by this phase is due to running Strength Reduction.
+//
+// Criteria for hardware loops:
+// - Countable loops (w/ ind. var for a trip count)
+// - Assumes loops are normalized by IndVarSimplify
+// - Try inner-most loops first
+// - No nested hardware loops.
+// - No function calls in loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hwloops"
+#include "llvm/Constants.h"
+#include "llvm/PassSupport.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <algorithm>
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
+
+namespace {
+ class CountValue;
+ struct HexagonHardwareLoops : public MachineFunctionPass {
+ MachineLoopInfo *MLI;
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+
+ HexagonHardwareLoops() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "Hexagon Hardware Loops"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// getCanonicalInductionVariable - Check to see if the loop has a canonical
+ /// induction variable.
+ /// Should be defined in MachineLoop. Based upon version in class Loop.
+ const MachineInstr *getCanonicalInductionVariable(MachineLoop *L) const;
+
+ /// getTripCount - Return a loop-invariant LLVM register indicating the
+ /// number of times the loop will be executed. If the trip-count cannot
+ /// be determined, this return null.
+ CountValue *getTripCount(MachineLoop *L) const;
+
+ /// isInductionOperation - Return true if the instruction matches the
+ /// pattern for an opertion that defines an induction variable.
+ bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const;
+
+ /// isInvalidOperation - Return true if the instruction is not valid within
+ /// a hardware loop.
+ bool isInvalidLoopOperation(const MachineInstr *MI) const;
+
+ /// containsInavlidInstruction - Return true if the loop contains an
+ /// instruction that inhibits using the hardware loop.
+ bool containsInvalidInstruction(MachineLoop *L) const;
+
+ /// converToHardwareLoop - Given a loop, check if we can convert it to a
+ /// hardware loop. If so, then perform the conversion and return true.
+ bool convertToHardwareLoop(MachineLoop *L);
+
+ };
+
+ char HexagonHardwareLoops::ID = 0;
+
+
+ // CountValue class - Abstraction for a trip count of a loop. A
+ // smaller vesrsion of the MachineOperand class without the concerns
+ // of changing the operand representation.
+ class CountValue {
+ public:
+ enum CountValueType {
+ CV_Register,
+ CV_Immediate
+ };
+ private:
+ CountValueType Kind;
+ union Values {
+ unsigned RegNum;
+ int64_t ImmVal;
+ Values(unsigned r) : RegNum(r) {}
+ Values(int64_t i) : ImmVal(i) {}
+ } Contents;
+ bool isNegative;
+
+ public:
+ CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r),
+ isNegative(neg) {}
+ explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i),
+ isNegative(i < 0) {}
+ CountValueType getType() const { return Kind; }
+ bool isReg() const { return Kind == CV_Register; }
+ bool isImm() const { return Kind == CV_Immediate; }
+ bool isNeg() const { return isNegative; }
+
+ unsigned getReg() const {
+ assert(isReg() && "Wrong CountValue accessor");
+ return Contents.RegNum;
+ }
+ void setReg(unsigned Val) {
+ Contents.RegNum = Val;
+ }
+ int64_t getImm() const {
+ assert(isImm() && "Wrong CountValue accessor");
+ if (isNegative) {
+ return -Contents.ImmVal;
+ }
+ return Contents.ImmVal;
+ }
+ void setImm(int64_t Val) {
+ Contents.ImmVal = Val;
+ }
+
+ void print(raw_ostream &OS, const TargetMachine *TM = 0) const {
+ if (isReg()) { OS << PrintReg(getReg()); }
+ if (isImm()) { OS << getImm(); }
+ }
+ };
+
+ struct HexagonFixupHwLoops : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+
+ HexagonFixupHwLoops() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// Maximum distance between the loop instr and the basic block.
+ /// Just an estimate.
+ static const unsigned MAX_LOOP_DISTANCE = 200;
+
+ /// fixupLoopInstrs - Check the offset between each loop instruction and
+ /// the loop basic block to determine if we can use the LOOP instruction
+ /// or if we need to set the LC/SA registers explicitly.
+ bool fixupLoopInstrs(MachineFunction &MF);
+
+ /// convertLoopInstr - Add the instruction to set the LC and SA registers
+ /// explicitly.
+ void convertLoopInstr(MachineFunction &MF,
+ MachineBasicBlock::iterator &MII,
+ RegScavenger &RS);
+
+ };
+
+ char HexagonFixupHwLoops::ID = 0;
+
+} // end anonymous namespace
+
+
+/// isHardwareLoop - Returns true if the instruction is a hardware loop
+/// instruction.
+static bool isHardwareLoop(const MachineInstr *MI) {
+ return MI->getOpcode() == Hexagon::LOOP0_r ||
+ MI->getOpcode() == Hexagon::LOOP0_i;
+}
+
+/// isCompareEquals - Returns true if the instruction is a compare equals
+/// instruction with an immediate operand.
+static bool isCompareEqualsImm(const MachineInstr *MI) {
+ return MI->getOpcode() == Hexagon::CMPEQri;
+}
+
+
+/// createHexagonHardwareLoops - Factory for creating
+/// the hardware loop phase.
+FunctionPass *llvm::createHexagonHardwareLoops() {
+ return new HexagonHardwareLoops();
+}
+
+
+bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
+
+ bool Changed = false;
+
+ // get the loop information
+ MLI = &getAnalysis<MachineLoopInfo>();
+ // get the register information
+ MRI = &MF.getRegInfo();
+ // the target specific instructio info.
+ TII = MF.getTarget().getInstrInfo();
+
+ for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+ I != E; ++I) {
+ MachineLoop *L = *I;
+ if (!L->getParentLoop()) {
+ Changed |= convertToHardwareLoop(L);
+ }
+ }
+
+ return Changed;
+}
+
+/// getCanonicalInductionVariable - Check to see if the loop has a canonical
+/// induction variable. We check for a simple recurrence pattern - an
+/// integer recurrence that decrements by one each time through the loop and
+/// ends at zero. If so, return the phi node that corresponds to it.
+///
+/// Based upon the similar code in LoopInfo except this code is specific to
+/// the machine.
+/// This method assumes that the IndVarSimplify pass has been run by 'opt'.
+///
+const MachineInstr
+*HexagonHardwareLoops::getCanonicalInductionVariable(MachineLoop *L) const {
+ MachineBasicBlock *TopMBB = L->getTopBlock();
+ MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
+ assert(PI != TopMBB->pred_end() &&
+ "Loop must have more than one incoming edge!");
+ MachineBasicBlock *Backedge = *PI++;
+ if (PI == TopMBB->pred_end()) return 0; // dead loop
+ MachineBasicBlock *Incoming = *PI++;
+ if (PI != TopMBB->pred_end()) return 0; // multiple backedges?
+
+ // make sure there is one incoming and one backedge and determine which
+ // is which.
+ if (L->contains(Incoming)) {
+ if (L->contains(Backedge))
+ return 0;
+ std::swap(Incoming, Backedge);
+ } else if (!L->contains(Backedge))
+ return 0;
+
+ // Loop over all of the PHI nodes, looking for a canonical induction variable:
+ // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2".
+ // - The recurrence comes from the backedge.
+ // - the definition is an induction operatio.n
+ for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end();
+ I != E && I->isPHI(); ++I) {
+ const MachineInstr *MPhi = &*I;
+ unsigned DefReg = MPhi->getOperand(0).getReg();
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
+ // Check each operand for the value from the backedge.
+ MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB();
+ if (L->contains(MBB)) { // operands comes from the backedge
+ // Check if the definition is an induction operation.
+ const MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg());
+ if (isInductionOperation(DI, DefReg)) {
+ return MPhi;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/// getTripCount - Return a loop-invariant LLVM value indicating the
+/// number of times the loop will be executed. The trip count can
+/// be either a register or a constant value. If the trip-count
+/// cannot be determined, this returns null.
+///
+/// We find the trip count from the phi instruction that defines the
+/// induction variable. We follow the links to the CMP instruction
+/// to get the trip count.
+///
+/// Based upon getTripCount in LoopInfo.
+///
+CountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const {
+ // Check that the loop has a induction variable.
+ const MachineInstr *IV_Inst = getCanonicalInductionVariable(L);
+ if (IV_Inst == 0) return 0;
+
+ // Canonical loops will end with a 'cmpeq_ri IV, Imm',
+ // if Imm is 0, get the count from the PHI opnd
+ // if Imm is -M, than M is the count
+ // Otherwise, Imm is the count
+ const MachineOperand *IV_Opnd;
+ const MachineOperand *InitialValue;
+ if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
+ InitialValue = &IV_Inst->getOperand(1);
+ IV_Opnd = &IV_Inst->getOperand(3);
+ } else {
+ InitialValue = &IV_Inst->getOperand(3);
+ IV_Opnd = &IV_Inst->getOperand(1);
+ }
+
+ // Look for the cmp instruction to determine if we
+ // can get a useful trip count. The trip count can
+ // be either a register or an immediate. The location
+ // of the value depends upon the type (reg or imm).
+ while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) {
+ const MachineInstr *MI = IV_Opnd->getParent();
+ if (L->contains(MI) && isCompareEqualsImm(MI)) {
+ const MachineOperand &MO = MI->getOperand(2);
+ assert(MO.isImm() && "IV Cmp Operand should be 0");
+ int64_t ImmVal = MO.getImm();
+
+ const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
+ assert(L->contains(IV_DefInstr->getParent()) &&
+ "IV definition should occurs in loop");
+ int64_t iv_value = IV_DefInstr->getOperand(2).getImm();
+
+ if (ImmVal == 0) {
+ // Make sure the induction variable changes by one on each iteration.
+ if (iv_value != 1 && iv_value != -1) {
+ return 0;
+ }
+ return new CountValue(InitialValue->getReg(), iv_value > 0);
+ } else {
+ assert(InitialValue->isReg() && "Expecting register for init value");
+ const MachineInstr *DefInstr = MRI->getVRegDef(InitialValue->getReg());
+ if (DefInstr && DefInstr->getOpcode() == Hexagon::TFRI) {
+ int64_t count = ImmVal - DefInstr->getOperand(1).getImm();
+ if ((count % iv_value) != 0) {
+ return 0;
+ }
+ return new CountValue(count/iv_value);
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/// isInductionOperation - return true if the operation is matches the
+/// pattern that defines an induction variable:
+/// add iv, c
+///
+bool
+HexagonHardwareLoops::isInductionOperation(const MachineInstr *MI,
+ unsigned IVReg) const {
+ return (MI->getOpcode() ==
+ Hexagon::ADD_ri && MI->getOperand(1).getReg() == IVReg);
+}
+
+/// isInvalidOperation - Return true if the operation is invalid within
+/// hardware loop.
+bool
+HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI) const {
+
+ // call is not allowed because the callee may use a hardware loop
+ if (MI->getDesc().isCall()) {
+ return true;
+ }
+ // do not allow nested hardware loops
+ if (isHardwareLoop(MI)) {
+ return true;
+ }
+ // check if the instruction defines a hardware loop register
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() &&
+ (MO.getReg() == Hexagon::LC0 || MO.getReg() == Hexagon::LC1 ||
+ MO.getReg() == Hexagon::SA0 || MO.getReg() == Hexagon::SA0)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/// containsInvalidInstruction - Return true if the loop contains
+/// an instruction that inhibits the use of the hardware loop function.
+///
+bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
+ const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Blocks[i];
+ for (MachineBasicBlock::iterator
+ MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
+ const MachineInstr *MI = &*MII;
+ if (isInvalidLoopOperation(MI)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// converToHardwareLoop - check if the loop is a candidate for
+/// converting to a hardware loop. If so, then perform the
+/// transformation.
+///
+/// This function works on innermost loops first. A loop can
+/// be converted if it is a counting loop; either a register
+/// value or an immediate.
+///
+/// The code makes several assumptions about the representation
+/// of the loop in llvm.
+bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
+ bool Changed = false;
+ // Process nested loops first.
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+ Changed |= convertToHardwareLoop(*I);
+ }
+ // If a nested loop has been converted, then we can't convert this loop.
+ if (Changed) {
+ return Changed;
+ }
+ // Are we able to determine the trip count for the loop?
+ CountValue *TripCount = getTripCount(L);
+ if (TripCount == 0) {
+ return false;
+ }
+ // Does the loop contain any invalid instructions?
+ if (containsInvalidInstruction(L)) {
+ return false;
+ }
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+ // No preheader means there's not place for the loop instr.
+ if (Preheader == 0) {
+ return false;
+ }
+ MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
+
+ MachineBasicBlock *LastMBB = L->getExitingBlock();
+ // Don't generate hw loop if the loop has more than one exit.
+ if (LastMBB == 0) {
+ return false;
+ }
+ MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
+
+ // Determine the loop start.
+ MachineBasicBlock *LoopStart = L->getTopBlock();
+ if (L->getLoopLatch() != LastMBB) {
+ // When the exit and latch are not the same, use the latch block as the
+ // start.
+ // The loop start address is used only after the 1st iteration, and the loop
+ // latch may contains instrs. that need to be executed after the 1st iter.
+ LoopStart = L->getLoopLatch();
+ // Make sure the latch is a successor of the exit, otherwise it won't work.
+ if (!LastMBB->isSuccessor(LoopStart)) {
+ return false;
+ }
+ }
+
+ // Convert the loop to a hardware loop
+ DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
+
+ if (TripCount->isReg()) {
+ // Create a copy of the loop count register.
+ MachineFunction *MF = LastMBB->getParent();
+ const TargetRegisterClass *RC =
+ MF->getRegInfo().getRegClass(TripCount->getReg());
+ unsigned CountReg = MF->getRegInfo().createVirtualRegister(RC);
+ BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg());
+ if (TripCount->isNeg()) {
+ unsigned CountReg1 = CountReg;
+ CountReg = MF->getRegInfo().createVirtualRegister(RC);
+ BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+ TII->get(Hexagon::NEG), CountReg).addReg(CountReg1);
+ }
+
+ // Add the Loop instruction to the begining of the loop.
+ BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+ TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg);
+ } else {
+ assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
+ // Add the Loop immediate instruction to the beginning of the loop.
+ int64_t CountImm = TripCount->getImm();
+ BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+ TII->get(Hexagon::LOOP0_i)).addMBB(LoopStart).addImm(CountImm);
+ }
+
+ // Make sure the loop start always has a reference in the CFG. We need to
+ // create a BlockAddress operand to get this mechanism to work both the
+ // MachineBasicBlock and BasicBlock objects need the flag set.
+ LoopStart->setHasAddressTaken();
+ // This line is needed to set the hasAddressTaken flag on the BasicBlock
+ // object
+ BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
+
+ // Replace the loop branch with an endloop instruction.
+ DebugLoc dl = LastI->getDebugLoc();
+ BuildMI(*LastMBB, LastI, dl, TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
+
+ // The loop ends with either:
+ // - a conditional branch followed by an unconditional branch, or
+ // - a conditional branch to the loop start.
+ if (LastI->getOpcode() == Hexagon::JMP_Pred ||
+ LastI->getOpcode() == Hexagon::JMP_PredNot) {
+ // delete one and change/add an uncond. branch to out of the loop
+ MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
+ LastI = LastMBB->erase(LastI);
+ if (!L->contains(BranchTarget)) {
+ if (LastI != LastMBB->end()) {
+ TII->RemoveBranch(*LastMBB);
+ }
+ SmallVector<MachineOperand, 0> Cond;
+ TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, dl);
+ }
+ } else {
+ // Conditional branch to loop start; just delete it.
+ LastMBB->erase(LastI);
+ }
+ delete TripCount;
+
+ ++NumHWLoops;
+ return true;
+}
+
+/// createHexagonFixupHwLoops - Factory for creating the hardware loop
+/// phase.
+FunctionPass *llvm::createHexagonFixupHwLoops() {
+ return new HexagonFixupHwLoops();
+}
+
+bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "****** Hexagon Hardware Loop Fixup ******\n");
+
+ bool Changed = fixupLoopInstrs(MF);
+ return Changed;
+}
+
+/// fixupLoopInsts - For Hexagon, if the loop label is to far from the
+/// loop instruction then we need to set the LC0 and SA0 registers
+/// explicitly instead of using LOOP(start,count). This function
+/// checks the distance, and generates register assignments if needed.
+///
+/// This function makes two passes over the basic blocks. The first
+/// pass computes the offset of the basic block from the start.
+/// The second pass checks all the loop instructions.
+bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
+
+ // Offset of the current instruction from the start.
+ unsigned InstOffset = 0;
+ // Map for each basic block to it's first instruction.
+ DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset;
+
+ // First pass - compute the offset of each basic block.
+ for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
+ MBB != MBBe; ++MBB) {
+ BlockToInstOffset[MBB] = InstOffset;
+ InstOffset += (MBB->size() * 4);
+ }
+
+ // Second pass - check each loop instruction to see if it needs to
+ // be converted.
+ InstOffset = 0;
+ bool Changed = false;
+ RegScavenger RS;
+
+ // Loop over all the basic blocks.
+ for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
+ MBB != MBBe; ++MBB) {
+ InstOffset = BlockToInstOffset[MBB];
+ RS.enterBasicBlock(MBB);
+
+ // Loop over all the instructions.
+ MachineBasicBlock::iterator MIE = MBB->end();
+ MachineBasicBlock::iterator MII = MBB->begin();
+ while (MII != MIE) {
+ if (isHardwareLoop(MII)) {
+ RS.forward(MII);
+ assert(MII->getOperand(0).isMBB() &&
+ "Expect a basic block as loop operand");
+ int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
+ diff = (diff > 0 ? diff : -diff);
+ if ((unsigned)diff > MAX_LOOP_DISTANCE) {
+ // Convert to explicity setting LC0 and SA0.
+ convertLoopInstr(MF, MII, RS);
+ MII = MBB->erase(MII);
+ Changed = true;
+ } else {
+ ++MII;
+ }
+ } else {
+ ++MII;
+ }
+ InstOffset += 4;
+ }
+ }
+
+ return Changed;
+
+}
+
+/// convertLoopInstr - convert a loop instruction to a sequence of instructions
+/// that set the lc and sa register explicitly.
+void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF,
+ MachineBasicBlock::iterator &MII,
+ RegScavenger &RS) {
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ MachineBasicBlock *MBB = MII->getParent();
+ DebugLoc DL = MII->getDebugLoc();
+ unsigned Scratch = RS.scavengeRegister(Hexagon::IntRegsRegisterClass, MII, 0);
+
+ // First, set the LC0 with the trip count.
+ if (MII->getOperand(1).isReg()) {
+ // Trip count is a register
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
+ .addReg(MII->getOperand(1).getReg());
+ } else {
+ // Trip count is an immediate.
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch)
+ .addImm(MII->getOperand(1).getImm());
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
+ .addReg(Scratch);
+ }
+ // Then, set the SA0 with the loop start address.
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch)
+ .addMBB(MII->getOperand(0).getMBB());
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0).addReg(Scratch);
+}
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
new file mode 100644
index 0000000..4deab9f
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -0,0 +1,1495 @@
+//==-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon ----==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the Hexagon target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-isel"
+#include "HexagonISelLowering.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===--------------------------------------------------------------------===//
+/// HexagonDAGToDAGISel - Hexagon specific code to select Hexagon machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class HexagonDAGToDAGISel : public SelectionDAGISel {
+ /// Subtarget - Keep a pointer to the Hexagon Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const HexagonSubtarget &Subtarget;
+
+ // Keep a reference to HexagonTargetMachine.
+ HexagonTargetMachine& TM;
+ const HexagonInstrInfo *TII;
+
+public:
+ explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine)
+ : SelectionDAGISel(targetmachine),
+ Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()),
+ TM(targetmachine),
+ TII(static_cast<const HexagonInstrInfo*>(TM.getInstrInfo())) {
+
+ }
+
+ SDNode *Select(SDNode *N);
+
+ // Complex Pattern Selectors.
+ bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRriS11_2(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectMEMriS11_2(SDValue& Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRriS11_3(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRrr(SDValue &Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRriU6_0(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRriU6_1(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRriU6_2(SDValue& N, SDValue &R1, SDValue &R2);
+
+ virtual const char *getPassName() const {
+ return "Hexagon DAG->DAG Pattern Instruction Selection";
+ }
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+ bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset);
+
+ SDNode *SelectLoad(SDNode *N);
+ SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, DebugLoc dl);
+ SDNode *SelectIndexedLoad(LoadSDNode *LD, DebugLoc dl);
+ SDNode *SelectIndexedLoadZeroExtend64(LoadSDNode *LD, unsigned Opcode,
+ DebugLoc dl);
+ SDNode *SelectIndexedLoadSignExtend64(LoadSDNode *LD, unsigned Opcode,
+ DebugLoc dl);
+ SDNode *SelectBaseOffsetStore(StoreSDNode *ST, DebugLoc dl);
+ SDNode *SelectIndexedStore(StoreSDNode *ST, DebugLoc dl);
+ SDNode *SelectStore(SDNode *N);
+ SDNode *SelectSHL(SDNode *N);
+ SDNode *SelectSelect(SDNode *N);
+ SDNode *SelectTruncate(SDNode *N);
+ SDNode *SelectMul(SDNode *N);
+ SDNode *SelectZeroExtend(SDNode *N);
+ SDNode *SelectIntrinsicWOChain(SDNode *N);
+ SDNode *SelectConstant(SDNode *N);
+ SDNode *SelectAdd(SDNode *N);
+
+ // Include the pieces autogenerated from the target description.
+#include "HexagonGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+
+/// createHexagonISelDag - This pass converts a legalized DAG into a
+/// Hexagon-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM) {
+ return new HexagonDAGToDAGISel(TM);
+}
+
+static bool IsS11_0_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<11>(v);
+}
+
+
+static bool IsS11_1_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,1>(v);
+}
+
+
+static bool IsS11_2_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,2>(v);
+}
+
+
+static bool IsS11_3_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,3>(v);
+}
+
+
+static bool IsU6_0_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<6>(v);
+}
+
+
+static bool IsU6_1_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,1>(v);
+}
+
+
+static bool IsU6_2_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,2>(v);
+}
+
+
+// Intrinsics that return a a predicate.
+static unsigned doesIntrinsicReturnPredicate(unsigned ID)
+{
+ switch (ID) {
+ default:
+ return 0;
+ case Intrinsic::hexagon_C2_cmpeq:
+ case Intrinsic::hexagon_C2_cmpgt:
+ case Intrinsic::hexagon_C2_cmpgtu:
+ case Intrinsic::hexagon_C2_cmpgtup:
+ case Intrinsic::hexagon_C2_cmpgtp:
+ case Intrinsic::hexagon_C2_cmpeqp:
+ case Intrinsic::hexagon_C2_bitsset:
+ case Intrinsic::hexagon_C2_bitsclr:
+ case Intrinsic::hexagon_C2_cmpeqi:
+ case Intrinsic::hexagon_C2_cmpgti:
+ case Intrinsic::hexagon_C2_cmpgtui:
+ case Intrinsic::hexagon_C2_cmpgei:
+ case Intrinsic::hexagon_C2_cmpgeui:
+ case Intrinsic::hexagon_C2_cmplt:
+ case Intrinsic::hexagon_C2_cmpltu:
+ case Intrinsic::hexagon_C2_bitsclri:
+ case Intrinsic::hexagon_C2_and:
+ case Intrinsic::hexagon_C2_or:
+ case Intrinsic::hexagon_C2_xor:
+ case Intrinsic::hexagon_C2_andn:
+ case Intrinsic::hexagon_C2_not:
+ case Intrinsic::hexagon_C2_orn:
+ case Intrinsic::hexagon_C2_pxfer_map:
+ case Intrinsic::hexagon_C2_any8:
+ case Intrinsic::hexagon_C2_all8:
+ case Intrinsic::hexagon_A2_vcmpbeq:
+ case Intrinsic::hexagon_A2_vcmpbgtu:
+ case Intrinsic::hexagon_A2_vcmpheq:
+ case Intrinsic::hexagon_A2_vcmphgt:
+ case Intrinsic::hexagon_A2_vcmphgtu:
+ case Intrinsic::hexagon_A2_vcmpweq:
+ case Intrinsic::hexagon_A2_vcmpwgt:
+ case Intrinsic::hexagon_A2_vcmpwgtu:
+ case Intrinsic::hexagon_C2_tfrrp:
+ case Intrinsic::hexagon_S2_tstbit_i:
+ case Intrinsic::hexagon_S2_tstbit_r:
+ return 1;
+ }
+}
+
+
+// Intrinsics that have predicate operands.
+static unsigned doesIntrinsicContainPredicate(unsigned ID)
+{
+ switch (ID) {
+ default:
+ return 0;
+ case Intrinsic::hexagon_C2_tfrpr:
+ return Hexagon::TFR_RsPd;
+ case Intrinsic::hexagon_C2_and:
+ return Hexagon::AND_pp;
+ case Intrinsic::hexagon_C2_xor:
+ return Hexagon::XOR_pp;
+ case Intrinsic::hexagon_C2_or:
+ return Hexagon::OR_pp;
+ case Intrinsic::hexagon_C2_not:
+ return Hexagon::NOT_pp;
+ case Intrinsic::hexagon_C2_any8:
+ return Hexagon::ANY_pp;
+ case Intrinsic::hexagon_C2_all8:
+ return Hexagon::ALL_pp;
+ case Intrinsic::hexagon_C2_vitpack:
+ return Hexagon::VITPACK_pp;
+ case Intrinsic::hexagon_C2_mask:
+ return Hexagon::MASK_p;
+ case Intrinsic::hexagon_C2_mux:
+ return Hexagon::MUX_rr;
+
+ // Mapping hexagon_C2_muxir to MUX_pri. This is pretty weird - but
+ // that's how it's mapped in q6protos.h.
+ case Intrinsic::hexagon_C2_muxir:
+ return Hexagon::MUX_ri;
+
+ // Mapping hexagon_C2_muxri to MUX_pir. This is pretty weird - but
+ // that's how it's mapped in q6protos.h.
+ case Intrinsic::hexagon_C2_muxri:
+ return Hexagon::MUX_ir;
+
+ case Intrinsic::hexagon_C2_muxii:
+ return Hexagon::MUX_ii;
+ case Intrinsic::hexagon_C2_vmux:
+ return Hexagon::VMUX_prr64;
+ case Intrinsic::hexagon_S2_valignrb:
+ return Hexagon::VALIGN_rrp;
+ case Intrinsic::hexagon_S2_vsplicerb:
+ return Hexagon::VSPLICE_rrp;
+ }
+}
+
+
+static bool OffsetFitsS11(EVT MemType, int64_t Offset) {
+ if (MemType == MVT::i64 && isShiftedInt<11,3>(Offset)) {
+ return true;
+ }
+ if (MemType == MVT::i32 && isShiftedInt<11,2>(Offset)) {
+ return true;
+ }
+ if (MemType == MVT::i16 && isShiftedInt<11,1>(Offset)) {
+ return true;
+ }
+ if (MemType == MVT::i8 && isInt<11>(Offset)) {
+ return true;
+ }
+ return false;
+}
+
+
+//
+// Try to lower loads of GlobalAdresses into base+offset loads. Custom
+// lowering for GlobalAddress nodes has already turned it into a
+// CONST32.
+//
+SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, DebugLoc dl) {
+ EVT LoadedVT = LD->getMemoryVT();
+ SDValue Chain = LD->getChain();
+ SDNode* Const32 = LD->getBasePtr().getNode();
+ unsigned Opcode = 0;
+
+ if (Const32->getOpcode() == HexagonISD::CONST32 &&
+ ISD::isNormalLoad(LD)) {
+ SDValue Base = Const32->getOperand(0);
+ EVT LoadedVT = LD->getMemoryVT();
+ int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset();
+ if (Offset != 0 && OffsetFitsS11(LoadedVT, Offset)) {
+ MVT PointerTy = TLI.getPointerTy();
+ const GlobalValue* GV =
+ cast<GlobalAddressSDNode>(Base)->getGlobal();
+ SDValue TargAddr =
+ CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0);
+ SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set,
+ dl, PointerTy,
+ TargAddr);
+ // Figure out base + offset opcode
+ if (LoadedVT == MVT::i64) Opcode = Hexagon::LDrid_indexed;
+ else if (LoadedVT == MVT::i32) Opcode = Hexagon::LDriw_indexed;
+ else if (LoadedVT == MVT::i16) Opcode = Hexagon::LDrih_indexed;
+ else if (LoadedVT == MVT::i8) Opcode = Hexagon::LDrib_indexed;
+ else assert (0 && "unknown memory type");
+
+ // Build indexed load.
+ SDValue TargetConstOff = CurDAG->getTargetConstant(Offset, PointerTy);
+ SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
+ LD->getValueType(0),
+ MVT::Other,
+ SDValue(NewBase,0),
+ TargetConstOff,
+ Chain);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ ReplaceUses(LD, Result);
+ return Result;
+ }
+ }
+
+ return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD,
+ unsigned Opcode,
+ DebugLoc dl)
+{
+ SDValue Chain = LD->getChain();
+ EVT LoadedVT = LD->getMemoryVT();
+ SDValue Base = LD->getBasePtr();
+ SDValue Offset = LD->getOffset();
+ SDNode *OffsetNode = Offset.getNode();
+ int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+ SDValue N1 = LD->getOperand(1);
+ SDValue CPTmpN1_0;
+ SDValue CPTmpN1_1;
+ if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
+ N1.getNode()->getValueType(0) == MVT::i32) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val)) {
+ SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
+ MVT::Other, Base, TargetConst,
+ Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl, MVT::i64,
+ SDValue(Result_1, 0));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_2, 0),
+ SDValue(Result_1, 1),
+ SDValue(Result_1, 2)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_2;
+ }
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::Other, Base, TargetConst0,
+ Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl,
+ MVT::i64, SDValue(Result_1, 0));
+ SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl,
+ MVT::i32, Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_2, 0),
+ SDValue(Result_3, 0),
+ SDValue(Result_1, 1)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_2;
+ }
+ return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD,
+ unsigned Opcode,
+ DebugLoc dl)
+{
+ SDValue Chain = LD->getChain();
+ EVT LoadedVT = LD->getMemoryVT();
+ SDValue Base = LD->getBasePtr();
+ SDValue Offset = LD->getOffset();
+ SDNode *OffsetNode = Offset.getNode();
+ int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+ SDValue N1 = LD->getOperand(1);
+ SDValue CPTmpN1_0;
+ SDValue CPTmpN1_1;
+ if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
+ N1.getNode()->getValueType(0) == MVT::i32) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val)) {
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::i32, MVT::Other, Base,
+ TargetConstVal, Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+ TargetConst0);
+ SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+ MVT::i64, MVT::Other,
+ SDValue(Result_2,0),
+ SDValue(Result_1,0));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_3, 0),
+ SDValue(Result_1, 1),
+ SDValue(Result_1, 2)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_3;
+ }
+
+ // Generate an indirect load.
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::Other,
+ Base, TargetConst0, Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+ TargetConst0);
+ SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+ MVT::i64, MVT::Other,
+ SDValue(Result_2,0),
+ SDValue(Result_1,0));
+ // Add offset to base.
+ SDNode* Result_4 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+ Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_3, 0), // Load value.
+ SDValue(Result_4, 0), // New address.
+ SDValue(Result_1, 1)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_3;
+ }
+
+ return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, DebugLoc dl) {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Offset = LD->getOffset();
+ SDNode *OffsetNode = Offset.getNode();
+ // Get the constant value.
+ int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+ EVT LoadedVT = LD->getMemoryVT();
+ unsigned Opcode = 0;
+
+ // Check for zero ext loads.
+ bool zextval = (LD->getExtensionType() == ISD::ZEXTLOAD);
+
+ // Figure out the opcode.
+ if (LoadedVT == MVT::i64) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val))
+ Opcode = Hexagon::POST_LDrid;
+ else
+ Opcode = Hexagon::LDrid;
+ } else if (LoadedVT == MVT::i32) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val))
+ Opcode = Hexagon::POST_LDriw;
+ else
+ Opcode = Hexagon::LDriw;
+ } else if (LoadedVT == MVT::i16) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val))
+ Opcode = zextval ? Hexagon::POST_LDriuh : Hexagon::POST_LDrih;
+ else
+ Opcode = zextval ? Hexagon::LDriuh : Hexagon::LDrih;
+ } else if (LoadedVT == MVT::i8) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val))
+ Opcode = zextval ? Hexagon::POST_LDriub : Hexagon::POST_LDrib;
+ else
+ Opcode = zextval ? Hexagon::LDriub : Hexagon::LDrib;
+ } else
+ assert (0 && "unknown memory type");
+
+ // For zero ext i64 loads, we need to add combine instructions.
+ if (LD->getValueType(0) == MVT::i64 &&
+ LD->getExtensionType() == ISD::ZEXTLOAD) {
+ return SelectIndexedLoadZeroExtend64(LD, Opcode, dl);
+ }
+ if (LD->getValueType(0) == MVT::i64 &&
+ LD->getExtensionType() == ISD::SEXTLOAD) {
+ // Handle sign ext i64 loads.
+ return SelectIndexedLoadSignExtend64(LD, Opcode, dl);
+ }
+ if (TII->isValidAutoIncImm(LoadedVT, Val)) {
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
+ LD->getValueType(0),
+ MVT::i32, MVT::Other, Base,
+ TargetConstVal, Chain);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result, 0),
+ SDValue(Result, 1),
+ SDValue(Result, 2)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result;
+ } else {
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl,
+ LD->getValueType(0),
+ MVT::Other, Base, TargetConst0,
+ Chain);
+ SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+ Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_1, 0),
+ SDValue(Result_2, 0),
+ SDValue(Result_1, 1)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_1;
+ }
+
+ return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) {
+ SDNode *result;
+ DebugLoc dl = N->getDebugLoc();
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+
+ // Handle indexed loads.
+ if (AM != ISD::UNINDEXED) {
+ result = SelectIndexedLoad(LD, dl);
+ } else {
+ result = SelectBaseOffsetLoad(LD, dl);
+ }
+
+ return result;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) {
+ SDValue Chain = ST->getChain();
+ SDValue Base = ST->getBasePtr();
+ SDValue Offset = ST->getOffset();
+ SDValue Value = ST->getValue();
+ SDNode *OffsetNode = Offset.getNode();
+ // Get the constant value.
+ int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+ EVT StoredVT = ST->getMemoryVT();
+
+ // Offset value must be within representable range
+ // and must have correct alignment properties.
+ if (TII->isValidAutoIncImm(StoredVT, Val)) {
+ SDValue Ops[] = { Value, Base,
+ CurDAG->getTargetConstant(Val, MVT::i32), Chain};
+ unsigned Opcode = 0;
+
+ // Figure out the post inc version of opcode.
+ if (StoredVT == MVT::i64) Opcode = Hexagon::POST_STdri;
+ else if (StoredVT == MVT::i32) Opcode = Hexagon::POST_STwri;
+ else if (StoredVT == MVT::i16) Opcode = Hexagon::POST_SThri;
+ else if (StoredVT == MVT::i8) Opcode = Hexagon::POST_STbri;
+ else assert (0 && "unknown memory type");
+
+ // Build post increment store.
+ SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::Other, Ops, 4);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = ST->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+
+ ReplaceUses(ST, Result);
+ ReplaceUses(SDValue(ST,1), SDValue(Result,1));
+ return Result;
+ }
+
+ // Note: Order of operands matches the def of instruction:
+ // def STrid : STInst<(outs), (ins MEMri:$addr, DoubleRegs:$src1), ...
+ // and it differs for POST_ST* for instance.
+ SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, MVT::i32), Value,
+ Chain};
+ unsigned Opcode = 0;
+
+ // Figure out the opcode.
+ if (StoredVT == MVT::i64) Opcode = Hexagon::STrid;
+ else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw;
+ else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih;
+ else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib;
+ else assert (0 && "unknown memory type");
+
+ // Build regular store.
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops,
+ 4);
+ // Build splitted incriment instruction.
+ SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+ Base,
+ TargetConstVal,
+ SDValue(Result_1, 0));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = ST->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+
+ ReplaceUses(SDValue(ST,0), SDValue(Result_2,0));
+ ReplaceUses(SDValue(ST,1), SDValue(Result_1,0));
+ return Result_2;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST,
+ DebugLoc dl) {
+ SDValue Chain = ST->getChain();
+ SDNode* Const32 = ST->getBasePtr().getNode();
+ SDValue Value = ST->getValue();
+ unsigned Opcode = 0;
+
+ // Try to lower stores of GlobalAdresses into indexed stores. Custom
+ // lowering for GlobalAddress nodes has already turned it into a
+ // CONST32. Avoid truncating stores for the moment. Post-inc stores
+ // do the same. Don't think there's a reason for it, so will file a
+ // bug to fix.
+ if ((Const32->getOpcode() == HexagonISD::CONST32) &&
+ !(Value.getValueType() == MVT::i64 && ST->isTruncatingStore())) {
+ SDValue Base = Const32->getOperand(0);
+ if (Base.getOpcode() == ISD::TargetGlobalAddress) {
+ EVT StoredVT = ST->getMemoryVT();
+ int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset();
+ if (Offset != 0 && OffsetFitsS11(StoredVT, Offset)) {
+ MVT PointerTy = TLI.getPointerTy();
+ const GlobalValue* GV =
+ cast<GlobalAddressSDNode>(Base)->getGlobal();
+ SDValue TargAddr =
+ CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0);
+ SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set,
+ dl, PointerTy,
+ TargAddr);
+
+ // Figure out base + offset opcode
+ if (StoredVT == MVT::i64) Opcode = Hexagon::STrid_indexed;
+ else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw_indexed;
+ else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih_indexed;
+ else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib_indexed;
+ else assert (0 && "unknown memory type");
+
+ SDValue Ops[] = {SDValue(NewBase,0),
+ CurDAG->getTargetConstant(Offset,PointerTy),
+ Value, Chain};
+ // build indexed store
+ SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
+ MVT::Other, Ops, 4);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = ST->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ ReplaceUses(ST, Result);
+ return Result;
+ }
+ }
+ }
+
+ return SelectCode(ST);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ ISD::MemIndexedMode AM = ST->getAddressingMode();
+
+ // Handle indexed stores.
+ if (AM != ISD::UNINDEXED) {
+ return SelectIndexedStore(ST, dl);
+ }
+
+ return SelectBaseOffsetStore(ST, dl);
+}
+
+SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+
+ //
+ // %conv.i = sext i32 %tmp1 to i64
+ // %conv2.i = sext i32 %add to i64
+ // %mul.i = mul nsw i64 %conv2.i, %conv.i
+ //
+ // --- match with the following ---
+ //
+ // %mul.i = mpy (%tmp1, %add)
+ //
+
+ if (N->getValueType(0) == MVT::i64) {
+ // Shifting a i64 signed multiply.
+ SDValue MulOp0 = N->getOperand(0);
+ SDValue MulOp1 = N->getOperand(1);
+
+ SDValue OP0;
+ SDValue OP1;
+
+ // Handle sign_extend and sextload.
+ if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Sext0 = MulOp0.getOperand(0);
+ if (Sext0.getNode()->getValueType(0) != MVT::i32) {
+ SelectCode(N);
+ }
+
+ OP0 = Sext0;
+ } else if (MulOp0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode());
+ if (LD->getMemoryVT() != MVT::i32 ||
+ LD->getExtensionType() != ISD::SEXTLOAD ||
+ LD->getAddressingMode() != ISD::UNINDEXED) {
+ SelectCode(N);
+ }
+
+ SDValue Base = LD->getBasePtr();
+ SDValue Chain = LD->getChain();
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+ MVT::Other,
+ LD->getBasePtr(), TargetConst0,
+ Chain), 0);
+ } else {
+ return SelectCode(N);
+ }
+
+ // Same goes for the second operand.
+ if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Sext1 = MulOp1.getOperand(0);
+ if (Sext1.getNode()->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+
+ OP1 = Sext1;
+ } else if (MulOp1.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode());
+ if (LD->getMemoryVT() != MVT::i32 ||
+ LD->getExtensionType() != ISD::SEXTLOAD ||
+ LD->getAddressingMode() != ISD::UNINDEXED) {
+ return SelectCode(N);
+ }
+
+ SDValue Base = LD->getBasePtr();
+ SDValue Chain = LD->getChain();
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+ MVT::Other,
+ LD->getBasePtr(), TargetConst0,
+ Chain), 0);
+ } else {
+ return SelectCode(N);
+ }
+
+ // Generate a mpy instruction.
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY64, dl, MVT::i64,
+ OP0, OP1);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+
+ return SelectCode(N);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectSelect(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue N0 = N->getOperand(0);
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ SDValue N000 = N00.getOperand(0);
+ SDValue N001 = N00.getOperand(1);
+ if (cast<VTSDNode>(N001)->getVT() == MVT::i16) {
+ SDValue N01 = N0.getOperand(1);
+ SDValue N02 = N0.getOperand(2);
+
+ // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2,
+ // i16:Other),IntRegs:i32:$src1, SETLT:Other),IntRegs:i32:$src1,
+ // IntRegs:i32:$src2)
+ // Emits: (MAXh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2)
+ // Pattern complexity = 9 cost = 1 size = 0.
+ if (cast<CondCodeSDNode>(N02)->get() == ISD::SETLT) {
+ SDValue N1 = N->getOperand(1);
+ if (N01 == N1) {
+ SDValue N2 = N->getOperand(2);
+ if (N000 == N2 &&
+ N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
+ N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
+ SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl,
+ MVT::i32, N000);
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::MAXw_rr, dl,
+ MVT::i32,
+ SDValue(SextNode, 0),
+ N1);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+ }
+
+ // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2,
+ // i16:Other), IntRegs:i32:$src1, SETGT:Other), IntRegs:i32:$src1,
+ // IntRegs:i32:$src2)
+ // Emits: (MINh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2)
+ // Pattern complexity = 9 cost = 1 size = 0.
+ if (cast<CondCodeSDNode>(N02)->get() == ISD::SETGT) {
+ SDValue N1 = N->getOperand(1);
+ if (N01 == N1) {
+ SDValue N2 = N->getOperand(2);
+ if (N000 == N2 &&
+ N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
+ N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
+ SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl,
+ MVT::i32, N000);
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::MINw_rr, dl,
+ MVT::i32,
+ SDValue(SextNode, 0),
+ N1);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return SelectCode(N);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Shift = N->getOperand(0);
+
+ //
+ // %conv.i = sext i32 %tmp1 to i64
+ // %conv2.i = sext i32 %add to i64
+ // %mul.i = mul nsw i64 %conv2.i, %conv.i
+ // %shr5.i = lshr i64 %mul.i, 32
+ // %conv3.i = trunc i64 %shr5.i to i32
+ //
+ // --- match with the following ---
+ //
+ // %conv3.i = mpy (%tmp1, %add)
+ //
+ // Trunc to i32.
+ if (N->getValueType(0) == MVT::i32) {
+ // Trunc from i64.
+ if (Shift.getNode()->getValueType(0) == MVT::i64) {
+ // Trunc child is logical shift right.
+ if (Shift.getOpcode() != ISD::SRL) {
+ return SelectCode(N);
+ }
+
+ SDValue ShiftOp0 = Shift.getOperand(0);
+ SDValue ShiftOp1 = Shift.getOperand(1);
+
+ // Shift by const 32
+ if (ShiftOp1.getOpcode() != ISD::Constant) {
+ return SelectCode(N);
+ }
+
+ int32_t ShiftConst =
+ cast<ConstantSDNode>(ShiftOp1.getNode())->getSExtValue();
+ if (ShiftConst != 32) {
+ return SelectCode(N);
+ }
+
+ // Shifting a i64 signed multiply
+ SDValue Mul = ShiftOp0;
+ if (Mul.getOpcode() != ISD::MUL) {
+ return SelectCode(N);
+ }
+
+ SDValue MulOp0 = Mul.getOperand(0);
+ SDValue MulOp1 = Mul.getOperand(1);
+
+ SDValue OP0;
+ SDValue OP1;
+
+ // Handle sign_extend and sextload
+ if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Sext0 = MulOp0.getOperand(0);
+ if (Sext0.getNode()->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+
+ OP0 = Sext0;
+ } else if (MulOp0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode());
+ if (LD->getMemoryVT() != MVT::i32 ||
+ LD->getExtensionType() != ISD::SEXTLOAD ||
+ LD->getAddressingMode() != ISD::UNINDEXED) {
+ return SelectCode(N);
+ }
+
+ SDValue Base = LD->getBasePtr();
+ SDValue Chain = LD->getChain();
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+ MVT::Other,
+ LD->getBasePtr(),
+ TargetConst0, Chain), 0);
+ } else {
+ return SelectCode(N);
+ }
+
+ // Same goes for the second operand.
+ if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Sext1 = MulOp1.getOperand(0);
+ if (Sext1.getNode()->getValueType(0) != MVT::i32)
+ return SelectCode(N);
+
+ OP1 = Sext1;
+ } else if (MulOp1.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode());
+ if (LD->getMemoryVT() != MVT::i32 ||
+ LD->getExtensionType() != ISD::SEXTLOAD ||
+ LD->getAddressingMode() != ISD::UNINDEXED) {
+ return SelectCode(N);
+ }
+
+ SDValue Base = LD->getBasePtr();
+ SDValue Chain = LD->getChain();
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+ MVT::Other,
+ LD->getBasePtr(),
+ TargetConst0, Chain), 0);
+ } else {
+ return SelectCode(N);
+ }
+
+ // Generate a mpy instruction.
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY, dl, MVT::i32,
+ OP0, OP1);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+
+ return SelectCode(N);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getValueType(0) == MVT::i32) {
+ SDValue Shl_0 = N->getOperand(0);
+ SDValue Shl_1 = N->getOperand(1);
+ // RHS is const.
+ if (Shl_1.getOpcode() == ISD::Constant) {
+ if (Shl_0.getOpcode() == ISD::MUL) {
+ SDValue Mul_0 = Shl_0.getOperand(0); // Val
+ SDValue Mul_1 = Shl_0.getOperand(1); // Const
+ // RHS of mul is const.
+ if (Mul_1.getOpcode() == ISD::Constant) {
+ int32_t ShlConst =
+ cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue();
+ int32_t MulConst =
+ cast<ConstantSDNode>(Mul_1.getNode())->getSExtValue();
+ int32_t ValConst = MulConst << ShlConst;
+ SDValue Val = CurDAG->getTargetConstant(ValConst,
+ MVT::i32);
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val.getNode()))
+ if (isInt<9>(CN->getSExtValue())) {
+ SDNode* Result =
+ CurDAG->getMachineNode(Hexagon::MPYI_ri, dl,
+ MVT::i32, Mul_0, Val);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+
+ }
+ } else if (Shl_0.getOpcode() == ISD::SUB) {
+ SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
+ SDValue Sub_1 = Shl_0.getOperand(1); // Val
+ if (Sub_0.getOpcode() == ISD::Constant) {
+ int32_t SubConst =
+ cast<ConstantSDNode>(Sub_0.getNode())->getSExtValue();
+ if (SubConst == 0) {
+ if (Sub_1.getOpcode() == ISD::SHL) {
+ SDValue Shl2_0 = Sub_1.getOperand(0); // Val
+ SDValue Shl2_1 = Sub_1.getOperand(1); // Const
+ if (Shl2_1.getOpcode() == ISD::Constant) {
+ int32_t ShlConst =
+ cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue();
+ int32_t Shl2Const =
+ cast<ConstantSDNode>(Shl2_1.getNode())->getSExtValue();
+ int32_t ValConst = 1 << (ShlConst+Shl2Const);
+ SDValue Val = CurDAG->getTargetConstant(-ValConst, MVT::i32);
+ if (ConstantSDNode *CN =
+ dyn_cast<ConstantSDNode>(Val.getNode()))
+ if (isInt<9>(CN->getSExtValue())) {
+ SDNode* Result =
+ CurDAG->getMachineNode(Hexagon::MPYI_ri, dl, MVT::i32,
+ Shl2_0, Val);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ return SelectCode(N);
+}
+
+
+//
+// If there is an zero_extend followed an intrinsic in DAG (this means - the
+// result of the intrinsic is predicate); convert the zero_extend to
+// transfer instruction.
+//
+// Zero extend -> transfer is lowered here. Otherwise, zero_extend will be
+// converted into a MUX as predicate registers defined as 1 bit in the
+// compiler. Architecture defines them as 8-bit registers.
+// We want to preserve all the lower 8-bits and, not just 1 LSB bit.
+//
+SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDNode *IsIntrinsic = N->getOperand(0).getNode();
+ if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) {
+ unsigned ID =
+ cast<ConstantSDNode>(IsIntrinsic->getOperand(0))->getZExtValue();
+ if (doesIntrinsicReturnPredicate(ID)) {
+ // Now we need to differentiate target data types.
+ if (N->getValueType(0) == MVT::i64) {
+ // Convert the zero_extend to Rs = Pd followed by COMBINE_rr(0,Rs).
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl,
+ MVT::i32,
+ SDValue(IsIntrinsic, 0));
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl,
+ MVT::i32,
+ TargetConst0);
+ SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+ MVT::i64, MVT::Other,
+ SDValue(Result_2, 0),
+ SDValue(Result_1, 0));
+ ReplaceUses(N, Result_3);
+ return Result_3;
+ }
+ if (N->getValueType(0) == MVT::i32) {
+ // Convert the zero_extend to Rs = Pd
+ SDNode* RsPd = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl,
+ MVT::i32,
+ SDValue(IsIntrinsic, 0));
+ ReplaceUses(N, RsPd);
+ return RsPd;
+ }
+ assert(0 && "Unexpected value type");
+ }
+ }
+ return SelectCode(N);
+}
+
+
+//
+// Checking for intrinsics which have predicate registers as operand(s)
+// and lowering to the actual intrinsic.
+//
+SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ unsigned ID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned IntrinsicWithPred = doesIntrinsicContainPredicate(ID);
+
+ // We are concerned with only those intrinsics that have predicate registers
+ // as at least one of the operands.
+ if (IntrinsicWithPred) {
+ SmallVector<SDValue, 8> Ops;
+ const MCInstrDesc &MCID = TII->get(IntrinsicWithPred);
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ // Iterate over all the operands of the intrinsics.
+ // For PredRegs, do the transfer.
+ // For Double/Int Regs, just preserve the value
+ // For immediates, lower it.
+ for (unsigned i = 1; i < N->getNumOperands(); ++i) {
+ SDNode *Arg = N->getOperand(i).getNode();
+ const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
+
+ if (RC == Hexagon::IntRegsRegisterClass ||
+ RC == Hexagon::DoubleRegsRegisterClass) {
+ Ops.push_back(SDValue(Arg, 0));
+ } else if (RC == Hexagon::PredRegsRegisterClass) {
+ // Do the transfer.
+ SDNode *PdRs = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1,
+ SDValue(Arg, 0));
+ Ops.push_back(SDValue(PdRs,0));
+ } else if (RC == NULL && (dyn_cast<ConstantSDNode>(Arg) != NULL)) {
+ // This is immediate operand. Lower it here making sure that we DO have
+ // const SDNode for immediate value.
+ int32_t Val = cast<ConstantSDNode>(Arg)->getSExtValue();
+ SDValue SDVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ Ops.push_back(SDVal);
+ } else {
+ assert(0 && "Unimplemented");
+ }
+ }
+ EVT ReturnValueVT = N->getValueType(0);
+ SDNode *Result = CurDAG->getMachineNode(IntrinsicWithPred, dl,
+ ReturnValueVT,
+ Ops.data(), Ops.size());
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ return SelectCode(N);
+}
+
+
+//
+// Map predicate true (encoded as -1 in LLVM) to a XOR.
+//
+SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getValueType(0) == MVT::i1) {
+ SDNode* Result;
+ int32_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+ if (Val == -1) {
+ unsigned NewIntReg = TM.getInstrInfo()->createVR(MF, MVT(MVT::i32));
+ SDValue Reg = CurDAG->getRegister(NewIntReg, MVT::i32);
+
+ // Create the IntReg = 1 node.
+ SDNode* IntRegTFR =
+ CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+ CurDAG->getTargetConstant(0, MVT::i32));
+
+ // Pd = IntReg
+ SDNode* Pd = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1,
+ SDValue(IntRegTFR, 0));
+
+ // not(Pd)
+ SDNode* NotPd = CurDAG->getMachineNode(Hexagon::NOT_pp, dl, MVT::i1,
+ SDValue(Pd, 0));
+
+ // xor(not(Pd))
+ Result = CurDAG->getMachineNode(Hexagon::XOR_pp, dl, MVT::i1,
+ SDValue(Pd, 0), SDValue(NotPd, 0));
+
+ // We have just built:
+ // Rs = Pd
+ // Pd = xor(not(Pd), Pd)
+
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+
+ return SelectCode(N);
+}
+
+
+//
+// Map add followed by a asr -> asr +=.
+//
+SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+ // Identify nodes of the form: add(asr(...)).
+ SDNode* Src1 = N->getOperand(0).getNode();
+ if (Src1->getOpcode() != ISD::SRA || !Src1->hasOneUse()
+ || Src1->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+
+ // Build Rd = Rd' + asr(Rs, Rt). The machine constraints will ensure that
+ // Rd and Rd' are assigned to the same register
+ SDNode* Result = CurDAG->getMachineNode(Hexagon::ASR_rr_acc, dl, MVT::i32,
+ N->getOperand(1),
+ Src1->getOperand(0),
+ Src1->getOperand(1));
+ ReplaceUses(N, Result);
+
+ return Result;
+}
+
+
+SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+
+ switch (N->getOpcode()) {
+ case ISD::Constant:
+ return SelectConstant(N);
+
+ case ISD::ADD:
+ return SelectAdd(N);
+
+ case ISD::SHL:
+ return SelectSHL(N);
+
+ case ISD::LOAD:
+ return SelectLoad(N);
+
+ case ISD::STORE:
+ return SelectStore(N);
+
+ case ISD::SELECT:
+ return SelectSelect(N);
+
+ case ISD::TRUNCATE:
+ return SelectTruncate(N);
+
+ case ISD::MUL:
+ return SelectMul(N);
+
+ case ISD::ZERO_EXTEND:
+ return SelectZeroExtend(N);
+
+ case ISD::INTRINSIC_WO_CHAIN:
+ return SelectIntrinsicWOChain(N);
+ }
+
+ return SelectCode(N);
+}
+
+
+//
+// Hexagon_TODO: Five functions for ADDRri?! Surely there must be a better way
+// to define these instructions.
+//
+bool HexagonDAGToDAGISel::SelectADDRri(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_0(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_0_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_0_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_1(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_1_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_1_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_2(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_2_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_2_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriU6_0(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_0_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_0_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriU6_1(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_1_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_1_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriU6_2(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_2_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_2_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectMEMriS11_2(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+
+ if (Addr.getOpcode() != ISD::ADD) {
+ return(SelectADDRriS11_2(Addr, Base, Offset));
+ }
+
+ return SelectADDRriS11_2(Addr, Base, Offset);
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_3(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_3_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_3_Offset(Offset.getNode()));
+}
+
+bool HexagonDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1,
+ SDValue &R2) {
+ if (Addr.getOpcode() == ISD::FrameIndex) return false;
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ if (isInt<13>(CN->getSExtValue()))
+ return false; // Let the reg+imm pattern catch this!
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ return true;
+ }
+
+ R1 = Addr;
+
+ return true;
+}
+
+
+// Handle generic address case. It is accessed from inlined asm =m constraints,
+// which could have any kind of pointer.
+bool HexagonDAGToDAGISel::SelectAddr(SDNode *Op, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+
+bool HexagonDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+
+ switch (ConstraintCode) {
+ case 'o': // Offsetable.
+ case 'v': // Not offsetable.
+ default: return true;
+ case 'm': // Memory.
+ if (!SelectAddr(Op.getNode(), Op, Op0, Op1))
+ return true;
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
new file mode 100644
index 0000000..0ac3cf0
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -0,0 +1,1505 @@
+//===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that Hexagon uses to lower LLVM code
+// into a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonISelLowering.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonTargetObjectFile.h"
+#include "HexagonSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/Support/CommandLine.h"
+
+const unsigned Hexagon_MAX_RET_SIZE = 64;
+using namespace llvm;
+
+static cl::opt<bool>
+EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden,
+ cl::desc("Control jump table emission on Hexagon target"));
+
+int NumNamedVarArgParams = -1;
+
+// Implement calling convention for Hexagon.
+static bool
+CC_Hexagon(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ // NumNamedVarArgParams can not be zero for a VarArg function.
+ assert ( (NumNamedVarArgParams > 0) &&
+ "NumNamedVarArgParams is not bigger than zero.");
+
+ if ( (int)ValNo < NumNamedVarArgParams ) {
+ // Deal with named arguments.
+ return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State);
+ }
+
+ // Deal with un-named arguments.
+ unsigned ofst;
+ if (ArgFlags.isByVal()) {
+ // If pass-by-value, the size allocated on stack is decided
+ // by ArgFlags.getByValSize(), not by the size of LocVT.
+ assert ((ArgFlags.getByValSize() > 8) &&
+ "ByValSize must be bigger than 8 bytes");
+ ofst = State.AllocateStack(ArgFlags.getByValSize(), 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::i32) {
+ ofst = State.AllocateStack(4, 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::i64) {
+ ofst = State.AllocateStack(8, 8);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ llvm_unreachable(0);
+
+ return true;
+}
+
+
+static bool
+CC_Hexagon (unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ if (ArgFlags.isByVal()) {
+ // Passed on stack.
+ assert ((ArgFlags.getByValSize() > 8) &&
+ "ByValSize must be bigger than 8 bytes");
+ unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+ }
+
+ if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
+ LocVT = MVT::i32;
+ ValVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ }
+
+ if (LocVT == MVT::i32) {
+ if (!CC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
+ if (LocVT == MVT::i64) {
+ if (!CC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
+ return true; // CC didn't match.
+}
+
+
+static bool CC_Hexagon32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ static const unsigned RegList[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5
+ };
+ if (unsigned Reg = State.AllocateReg(RegList, 6)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+
+ unsigned Offset = State.AllocateStack(4, 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+
+ static const unsigned RegList1[] = {
+ Hexagon::D1, Hexagon::D2
+ };
+ static const unsigned RegList2[] = {
+ Hexagon::R1, Hexagon::R3
+ };
+ if (unsigned Reg = State.AllocateReg(RegList1, RegList2, 2)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+
+ unsigned Offset = State.AllocateStack(8, 8, Hexagon::D2);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+
+ if (LocVT == MVT::i1 ||
+ LocVT == MVT::i8 ||
+ LocVT == MVT::i16) {
+ LocVT = MVT::i32;
+ ValVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ }
+
+ if (LocVT == MVT::i32) {
+ if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
+ if (LocVT == MVT::i64) {
+ if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
+ return true; // CC didn't match.
+}
+
+static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ if (LocVT == MVT::i32) {
+ if (unsigned Reg = State.AllocateReg(Hexagon::R0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ unsigned Offset = State.AllocateStack(4, 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ if (LocVT == MVT::i64) {
+ if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ unsigned Offset = State.AllocateStack(8, 8);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+SDValue
+HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
+const {
+ return SDValue();
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size". Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter. Sometimes what we are copying is the end of a
+/// larger object, the part that does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ DebugLoc dl) {
+
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*isVolatile=*/false, /*AlwaysInline=*/false,
+ MachinePointerInfo(), MachinePointerInfo());
+}
+
+
+// LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
+// passed by value, the function prototype is modified to return void and
+// the value is stored in memory pointed by a pointer passed by caller.
+SDValue
+HexagonTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of the return value to locations.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analyze return values of ISD::RET
+ CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
+
+ SDValue StackPtr = DAG.getRegister(TM.getRegisterInfo()->getStackRegister(),
+ MVT::i32);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ SDValue Ret = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
+
+ // Guarantee that all emitted copies are stuck together with flags.
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.getNode())
+ return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+
+ return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+
+
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers. This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered. Returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDValue
+HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const
+ SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDValue Callee) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl,
+ RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+/// LowerCall - Functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+SDValue
+HexagonTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // Check for varargs.
+ NumNamedVarArgParams = -1;
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee))
+ {
+ const Function* CalleeFn = NULL;
+ Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, MVT::i32);
+ if ((CalleeFn = dyn_cast<Function>(GA->getGlobal())))
+ {
+ // If a function has zero args and is a vararg function, that's
+ // disallowed so it must be an undeclared function. Do not assume
+ // varargs if the callee is undefined.
+ if (CalleeFn->isVarArg() &&
+ CalleeFn->getFunctionType()->getNumParams() != 0) {
+ NumNamedVarArgParams = CalleeFn->getFunctionType()->getNumParams();
+ }
+ }
+ }
+
+ if (NumNamedVarArgParams > 0)
+ CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg);
+ else
+ CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
+
+
+ if(isTailCall) {
+ bool StructAttrFlag =
+ DAG.getMachineFunction().getFunction()->hasStructRetAttr();
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ isVarArg, IsStructRet,
+ StructAttrFlag,
+ Outs, OutVals, Ins, DAG);
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i){
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isMemLoc()) {
+ isTailCall = false;
+ break;
+ }
+ }
+ if (isTailCall) {
+ DEBUG(dbgs () << "Eligible for Tail Call\n");
+ } else {
+ DEBUG(dbgs () <<
+ "Argument must be passed on stack. Not eligible for Tail Call\n");
+ }
+ }
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+ SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ SDValue StackPtr =
+ DAG.getCopyFromReg(Chain, dl, TM.getRegisterInfo()->getStackRegister(),
+ getPointerTy());
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default:
+ // Loc info must be one of Full, SExt, ZExt, or AExt.
+ assert(0 && "Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isMemLoc()) {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getConstant(LocMemOffset, StackPtr.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+
+ if (Flags.isByVal()) {
+ // The argument is a struct passed by value. According to LLVM, "Arg"
+ // is is pointer.
+ MemOpChains.push_back(CreateCopyOfByValArgument(Arg, PtrOff, Chain,
+ Flags, DAG, dl));
+ } else {
+ // The argument is not passed by value. "Arg" is a buildin type. It is
+ // not a pointer.
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),false, false,
+ 0));
+ }
+ continue;
+ }
+
+ // Arguments that can be passed on register must be kept at RegsToPass
+ // vector.
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ }
+ }
+
+ // Transform all store nodes into one single node because all store
+ // nodes are independent of each other.
+ if (!MemOpChains.empty()) {
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0],
+ MemOpChains.size());
+ }
+
+ if (!isTailCall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes,
+ getPointerTy(), true));
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emited instructions must be
+ // stuck together.
+ SDValue InFlag;
+ if (!isTailCall) {
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ }
+
+ // For tail calls lower the arguments to the 'real' stack slot.
+ if (isTailCall) {
+ // Force all the incoming stack arguments to be loaded from the stack
+ // before any new outgoing arguments are stored to the stack, because the
+ // outgoing stack slots may alias the incoming argument stack slots, and
+ // the alias isn't otherwise explicit. This is slightly more conservative
+ // than necessary, because it means that each store effectively depends
+ // on every argument instead of just those arguments it would clobber.
+ //
+ // Do not flag preceeding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ InFlag =SDValue();
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (flag_aligned_memcpy) {
+ const char *MemcpyName =
+ "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes";
+ Callee =
+ DAG.getTargetExternalSymbol(MemcpyName, getPointerTy());
+ flag_aligned_memcpy = false;
+ } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
+ } else if (ExternalSymbolSDNode *S =
+ dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+ }
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+ }
+
+ if (InFlag.getNode()) {
+ Ops.push_back(InFlag);
+ }
+
+ if (isTailCall)
+ return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+
+ Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+ InVals, OutVals, Callee);
+}
+
+static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
+ bool isSEXTLoad, SDValue &Base,
+ SDValue &Offset, bool &isInc,
+ SelectionDAG &DAG) {
+ if (Ptr->getOpcode() != ISD::ADD)
+ return false;
+
+ if (VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ // Ensure that Offset is a constant.
+ return (isa<ConstantSDNode>(Offset));
+ }
+
+ return false;
+}
+
+// TODO: Put this function along with the other isS* functions in
+// HexagonISelDAGToDAG.cpp into a common file. Or better still, use the
+// functions defined in HexagonImmediates.td.
+static bool Is_PostInc_S4_Offset(SDNode * S, int ShiftAmount) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // immS4 predicate - True if the immediate fits in a 4-bit sign extended.
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ int64_t m = 0;
+ if (ShiftAmount > 0) {
+ m = v % ShiftAmount;
+ v = v >> ShiftAmount;
+ }
+ return (v <= 7) && (v >= -8) && (m == 0);
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const
+{
+ EVT VT;
+ SDValue Ptr;
+ bool isSEXTLoad = false;
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ VT = LD->getMemoryVT();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ VT = ST->getMemoryVT();
+ if (ST->getValue().getValueType() == MVT::i64 && ST->isTruncatingStore()) {
+ return false;
+ }
+ } else {
+ return false;
+ }
+
+ bool isInc;
+ bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ // ShiftAmount = number of left-shifted bits in the Hexagon instruction.
+ int ShiftAmount = VT.getSizeInBits() / 16;
+ if (isLegal && Is_PostInc_S4_Offset(Offset.getNode(), ShiftAmount)) {
+ AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+ return true;
+ }
+
+ return false;
+}
+
+SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDNode *Node = Op.getNode();
+ MachineFunction &MF = DAG.getMachineFunction();
+ HexagonMachineFunctionInfo *FuncInfo =
+ MF.getInfo<HexagonMachineFunctionInfo>();
+ switch (Node->getOpcode()) {
+ case ISD::INLINEASM: {
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the flag operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ if (FuncInfo->hasClobberLR())
+ break;
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ ++i; // Skip the ID value.
+
+ switch (InlineAsm::getKind(Flags)) {
+ default: llvm_unreachable("Bad flags!");
+ case InlineAsm::Kind_RegDef:
+ case InlineAsm::Kind_RegUse:
+ case InlineAsm::Kind_Imm:
+ case InlineAsm::Kind_Clobber:
+ case InlineAsm::Kind_Mem: {
+ for (; NumVals; --NumVals, ++i) {}
+ break;
+ }
+ case InlineAsm::Kind_RegDefEarlyClobber: {
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg =
+ cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+
+ // Check it to be lr
+ if (Reg == TM.getRegisterInfo()->getRARegister()) {
+ FuncInfo->setHasClobberLR(true);
+ break;
+ }
+ }
+ break;
+ }
+ }
+ }
+ }
+ } // Node->getOpcode
+ return Op;
+}
+
+
+//
+// Taken from the XCore backend.
+//
+SDValue HexagonTargetLowering::
+LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Table = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+ unsigned JTI = JT->getIndex();
+ MachineFunction &MF = DAG.getMachineFunction();
+ const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
+
+ // Mark all jump table targets as address taken.
+ const std::vector<MachineJumpTableEntry> &JTE = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JTE[JTI].MBBs;
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ MBB->setHasAddressTaken();
+ // This line is needed to set the hasAddressTaken flag on the BasicBlock
+ // object.
+ BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock()));
+ }
+
+ SDValue JumpTableBase = DAG.getNode(HexagonISD::WrapperJT, dl,
+ getPointerTy(), TargetJT);
+ SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
+ DAG.getConstant(2, MVT::i32));
+ SDValue JTAddress = DAG.getNode(ISD::ADD, dl, MVT::i32, JumpTableBase,
+ ShiftIndex);
+ SDValue LoadTarget = DAG.getLoad(MVT::i32, dl, Chain, JTAddress,
+ MachinePointerInfo(), false, false, false,
+ 0);
+ return DAG.getNode(HexagonISD::BR_JT, dl, MVT::Other, Chain, LoadTarget);
+}
+
+
+SDValue
+HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ unsigned SPReg = getStackPointerRegisterToSaveRestore();
+
+ // Get a reference to the stack pointer.
+ SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32);
+
+ // Subtract the dynamic size from the actual stack size to
+ // obtain the new stack size.
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size);
+
+ //
+ // For Hexagon, the outgoing memory arguments area should be on top of the
+ // alloca area on the stack i.e., the outgoing memory arguments should be
+ // at a lower address than the alloca area. Move the alloca area down the
+ // stack by adding back the space reserved for outgoing arguments to SP
+ // here.
+ //
+ // We do not know what the size of the outgoing args is at this point.
+ // So, we add a pseudo instruction ADJDYNALLOC that will adjust the
+ // stack pointer. We patch this instruction with the correct, known
+ // offset in emitPrologue().
+ //
+ // Use a placeholder immediate (zero) for now. This will be patched up
+ // by emitPrologue().
+ SDValue ArgAdjust = DAG.getNode(HexagonISD::ADJDYNALLOC, dl,
+ MVT::i32,
+ Sub,
+ DAG.getConstant(0, MVT::i32));
+
+ // The Sub result contains the new stack start address, so it
+ // must be placed in the stack pointer register.
+ SDValue CopyChain = DAG.getCopyToReg(Chain, dl,
+ TM.getRegisterInfo()->getStackRegister(),
+ Sub);
+
+ SDValue Ops[2] = { ArgAdjust, CopyChain };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue
+HexagonTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const
+ SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ HexagonMachineFunctionInfo *FuncInfo =
+ MF.getInfo<HexagonMachineFunctionInfo>();
+
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
+
+ // For LLVM, in the case when returning a struct by value (>8byte),
+ // the first argument is a pointer that points to the location on caller's
+ // stack where the return value will be stored. For Hexagon, the location on
+ // caller's stack is passed only when the struct size is smaller than (and
+ // equal to) 8 bytes. If not, no address will be passed into callee and
+ // callee return the result direclty through R0/R1.
+
+ SmallVector<SDValue, 4> MemOps;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ unsigned ObjSize;
+ unsigned StackLocation;
+ int FI;
+
+ if ( (VA.isRegLoc() && !Flags.isByVal())
+ || (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() > 8)) {
+ // Arguments passed in registers
+ // 1. int, long long, ptr args that get allocated in register.
+ // 2. Large struct that gets an register to put its address in.
+ EVT RegVT = VA.getLocVT();
+ if (RegVT == MVT::i8 || RegVT == MVT::i16 || RegVT == MVT::i32) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(Hexagon::IntRegsRegisterClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ } else if (RegVT == MVT::i64) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(Hexagon::DoubleRegsRegisterClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ } else {
+ assert (0);
+ }
+ } else if (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() <= 8) {
+ assert (0 && "ByValSize must be bigger than 8 bytes");
+ } else {
+ // Sanity check.
+ assert(VA.isMemLoc());
+
+ if (Flags.isByVal()) {
+ // If it's a byval parameter, then we need to compute the
+ // "real" size, not the size of the pointer.
+ ObjSize = Flags.getByValSize();
+ } else {
+ ObjSize = VA.getLocVT().getStoreSizeInBits() >> 3;
+ }
+
+ StackLocation = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
+ // Create the frame index object for this incoming parameter...
+ FI = MFI->CreateFixedObject(ObjSize, StackLocation, true);
+
+ // Create the SelectionDAG nodes cordl, responding to a load
+ // from this parameter.
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+
+ if (Flags.isByVal()) {
+ // If it's a pass-by-value aggregate, then do not dereference the stack
+ // location. Instead, we should generate a reference to the stack
+ // location.
+ InVals.push_back(FIN);
+ } else {
+ InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+ MachinePointerInfo(), false, false,
+ false, 0));
+ }
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0],
+ MemOps.size());
+
+ if (isVarArg) {
+ // This will point to the next argument passed via stack.
+ int FrameIndex = MFI->CreateFixedObject(Hexagon_PointerSize,
+ HEXAGON_LRFP_SIZE +
+ CCInfo.getNextStackOffset(),
+ true);
+ FuncInfo->setVarArgsFrameIndex(FrameIndex);
+ }
+
+ return Chain;
+}
+
+SDValue
+HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+ // VASTART stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ MachineFunction &MF = DAG.getMachineFunction();
+ HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
+ SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), Op.getDebugLoc(), Addr,
+ Op.getOperand(1), MachinePointerInfo(SV), false,
+ false, 0);
+}
+
+SDValue
+HexagonTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDNode* OpNode = Op.getNode();
+
+ SDValue Cond = DAG.getNode(ISD::SETCC, Op.getDebugLoc(), MVT::i1,
+ Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(4));
+ return DAG.getNode(ISD::SELECT, Op.getDebugLoc(), OpNode->getValueType(0),
+ Cond, Op.getOperand(0),
+ Op.getOperand(1));
+}
+
+SDValue
+HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ if (Depth) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset = DAG.getConstant(4, MVT::i32);
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+ MachinePointerInfo(), false, false, false, 0);
+ }
+
+ // Return LR, which contains the return address. Mark it an implicit live-in.
+ unsigned Reg = MF.addLiveIn(TRI->getRARegister(), getRegClassFor(MVT::i32));
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+}
+
+SDValue
+HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+ const HexagonRegisterInfo *TRI = TM.getRegisterInfo();
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ TRI->getFrameRegister(), VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ return FrameAddr;
+}
+
+
+SDValue HexagonTargetLowering::LowerMEMBARRIER(SDValue Op,
+ SelectionDAG& DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
+}
+
+
+SDValue HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG& DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
+}
+
+
+SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Result;
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+ DebugLoc dl = Op.getDebugLoc();
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
+
+ HexagonTargetObjectFile &TLOF =
+ (HexagonTargetObjectFile&)getObjFileLowering();
+ if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
+ return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result);
+ }
+
+ return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result);
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
+ &targetmachine)
+ : TargetLowering(targetmachine, new HexagonTargetObjectFile()),
+ TM(targetmachine) {
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, Hexagon::IntRegsRegisterClass);
+ addRegisterClass(MVT::i64, Hexagon::DoubleRegsRegisterClass);
+
+ addRegisterClass(MVT::i1, Hexagon::PredRegsRegisterClass);
+
+ computeRegisterProperties();
+
+ // Align loop entry
+ setPrefLoopAlignment(4);
+
+ // Limits for inline expansion of memcpy/memmove
+ maxStoresPerMemcpy = 6;
+ maxStoresPerMemmove = 6;
+
+ //
+ // Library calls for unsupported operations
+ //
+ setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
+
+ setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
+ setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
+ setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
+ setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
+
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
+ setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
+
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
+ setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
+
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
+ setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
+ setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
+ setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
+ setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
+
+ setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
+
+ setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setLibcallName(RTLIB::SREM_I32, "__hexagon_umodsi3");
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
+ setOperationAction(ISD::SDIV, MVT::i64, Expand);
+ setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+
+ setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
+ setOperationAction(ISD::UDIV, MVT::i64, Expand);
+
+ setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+ setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
+ setOperationAction(ISD::FDIV, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
+ setOperationAction(ISD::FDIV, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2");
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
+ setOperationAction(ISD::FADD, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
+ setOperationAction(ISD::FADD, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
+ setOperationAction(ISD::FADD, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
+ setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
+ setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
+ setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
+ setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
+ setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
+ setCondCodeAction(ISD::SETOGT, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
+ setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
+ setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
+ setCondCodeAction(ISD::SETOLT, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
+ setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
+ setOperationAction(ISD::FMUL, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
+ setOperationAction(ISD::MUL, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
+ setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
+
+
+ setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
+ setOperationAction(ISD::SUB, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
+ setOperationAction(ISD::SUB, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2");
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2");
+ setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2");
+ setCondCodeAction(ISD::SETO, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
+ setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2");
+ setCondCodeAction(ISD::SETO, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2");
+ setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
+
+ setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i64, Legal);
+
+ setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);
+ setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);
+ setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
+ setIndexedStoreAction(ISD::POST_INC, MVT::i64, Legal);
+
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+
+ // Turn FP extload into load/fextend.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ // Hexagon has a i1 sign extending load.
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand);
+ // Turn FP truncstore into trunc + store.
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // Custom legalize GlobalAddress nodes into CONST32.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i8, Custom);
+ // Truncate action?
+ setOperationAction(ISD::TRUNCATE, MVT::i64, Expand);
+
+ // Hexagon doesn't have sext_inreg, replace them with shl/sra.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+
+ // Hexagon has no REM or DIVREM operations.
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+
+ // Expand fp<->uint.
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+ // Hexagon has no select or setcc: expand to SELECT_CC.
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+
+ // Lower SELECT_CC to SETCC and SELECT.
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+ // This is a workaround documented in DAGCombiner.cpp:2892 We don't
+ // support SELECT_CC on every type.
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ if (EmitJumpTables) {
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+ } else {
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ }
+
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL , MVT::i32, Expand);
+ setOperationAction(ISD::ROTR , MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+
+ setOperationAction(ISD::EH_RETURN, MVT::Other, Expand);
+
+ if (TM.getSubtargetImpl()->isSubtargetV2()) {
+ setExceptionPointerRegister(Hexagon::R20);
+ setExceptionSelectorRegister(Hexagon::R21);
+ } else {
+ setExceptionPointerRegister(Hexagon::R0);
+ setExceptionSelectorRegister(Hexagon::R1);
+ }
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+
+
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+ setOperationAction(ISD::INLINEASM , MVT::Other, Custom);
+
+ setMinFunctionAlignment(2);
+
+ // Needed for DYNAMIC_STACKALLOC expansion.
+ unsigned StackRegister = TM.getRegisterInfo()->getStackRegister();
+ setStackPointerRegisterToSaveRestore(StackRegister);
+}
+
+
+const char*
+HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case HexagonISD::CONST32: return "HexagonISD::CONST32";
+ case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC";
+ case HexagonISD::CMPICC: return "HexagonISD::CMPICC";
+ case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC";
+ case HexagonISD::BRICC: return "HexagonISD::BRICC";
+ case HexagonISD::BRFCC: return "HexagonISD::BRFCC";
+ case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC";
+ case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC";
+ case HexagonISD::Hi: return "HexagonISD::Hi";
+ case HexagonISD::Lo: return "HexagonISD::Lo";
+ case HexagonISD::FTOI: return "HexagonISD::FTOI";
+ case HexagonISD::ITOF: return "HexagonISD::ITOF";
+ case HexagonISD::CALL: return "HexagonISD::CALL";
+ case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
+ case HexagonISD::BR_JT: return "HexagonISD::BR_JT";
+ case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
+ }
+}
+
+bool
+HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+ EVT MTy1 = EVT::getEVT(Ty1);
+ EVT MTy2 = EVT::getEVT(Ty2);
+ if (!MTy1.isSimple() || !MTy2.isSimple()) {
+ return false;
+ }
+ return ((MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32));
+}
+
+bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+ if (!VT1.isSimple() || !VT2.isSimple()) {
+ return false;
+ }
+ return ((VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32));
+}
+
+SDValue
+HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Should not custom lower this!");
+ // Frame & Return address. Currently unimplemented.
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::GlobalTLSAddress:
+ assert(0 && "TLS not implemented for Hexagon.");
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
+ case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INLINEASM: return LowerINLINEASM(Op, DAG);
+
+ }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Hexagon Scheduler Hooks
+//===----------------------------------------------------------------------===//
+MachineBasicBlock *
+HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB)
+const {
+ switch (MI->getOpcode()) {
+ case Hexagon::ADJDYNALLOC: {
+ MachineFunction *MF = BB->getParent();
+ HexagonMachineFunctionInfo *FuncInfo =
+ MF->getInfo<HexagonMachineFunctionInfo>();
+ FuncInfo->addAllocaAdjustInst(MI);
+ return BB;
+ }
+ default:
+ assert(false && "Unexpected instr type to insert");
+ } // switch
+ return NULL;
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::pair<unsigned, const TargetRegisterClass*>
+HexagonTargetLowering::getRegForInlineAsmConstraint(const
+ std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r': // R0-R31
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ assert(0 && "getRegForInlineAsmConstraint Unhandled data type");
+ case MVT::i32:
+ case MVT::i16:
+ case MVT::i8:
+ return std::make_pair(0U, Hexagon::IntRegsRegisterClass);
+ case MVT::i64:
+ return std::make_pair(0U, Hexagon::DoubleRegsRegisterClass);
+ }
+ default:
+ assert(0 && "Unknown asm register class");
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented by
+/// AM is legal for this target, for a load/store of the specified type.
+bool HexagonTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ // Allows a signed-extended 11-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1) {
+ return false;
+ }
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV) {
+ return false;
+ }
+
+ int Scale = AM.Scale;
+ if (Scale < 0) Scale = -Scale;
+ switch (Scale) {
+ case 0: // No scale reg, "r+i", "r", or just "i".
+ break;
+ default: // No scaled addressing mode.
+ return false;
+ }
+ return true;
+}
+
+/// isLegalICmpImmediate - Return true if the specified immediate is legal
+/// icmp immediate, that is the target has icmp instructions which can compare
+/// a register against the immediate without having to materialize the
+/// immediate into a register.
+bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ return Imm >= -512 && Imm <= 511;
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
+ SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ const Function *CallerF = DAG.getMachineFunction().getFunction();
+ CallingConv::ID CallerCC = CallerF->getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ // ***************************************************************************
+ // Look for obvious safe cases to perform tail call optimization that do not
+ // require ABI changes.
+ // ***************************************************************************
+
+ // If this is a tail call via a function pointer, then don't do it!
+ if (!(dyn_cast<GlobalAddressSDNode>(Callee))
+ && !(dyn_cast<ExternalSymbolSDNode>(Callee))) {
+ return false;
+ }
+
+ // Do not optimize if the calling conventions do not match.
+ if (!CCMatch)
+ return false;
+
+ // Do not tail call optimize vararg calls.
+ if (isVarArg)
+ return false;
+
+ // Also avoid tail call optimization if either caller or callee uses struct
+ // return semantics.
+ if (isCalleeStructRet || isCallerStructRet)
+ return false;
+
+ // In addition to the cases above, we also disable Tail Call Optimization if
+ // the calling convention code that at least one outgoing argument needs to
+ // go on the stack. We cannot check that here because at this point that
+ // information is not available.
+ return true;
+}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
new file mode 100644
index 0000000..b327615
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -0,0 +1,162 @@
+//==-- HexagonISelLowering.h - Hexagon DAG Lowering Interface ----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Hexagon uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef Hexagon_ISELLOWERING_H
+#define Hexagon_ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "Hexagon.h"
+
+namespace llvm {
+ namespace HexagonISD {
+ enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ CONST32,
+ CONST32_GP, // For marking data present in GP.
+ SETCC,
+ ADJDYNALLOC,
+ ARGEXTEND,
+
+ CMPICC, // Compare two GPR operands, set icc.
+ CMPFCC, // Compare two FP operands, set fcc.
+ BRICC, // Branch to dest on icc condition
+ BRFCC, // Branch to dest on fcc condition
+ SELECT_ICC, // Select between two values using the current ICC flags.
+ SELECT_FCC, // Select between two values using the current FCC flags.
+
+ Hi, Lo, // Hi/Lo operations, typically on a global address.
+
+ FTOI, // FP to Int within a FP register.
+ ITOF, // Int to FP within a FP register.
+
+ CALL, // A call instruction.
+ RET_FLAG, // Return with a flag operand.
+ BR_JT, // Jump table.
+ BARRIER, // Memory barrier.
+ WrapperJT,
+ TC_RETURN
+ };
+ }
+
+ class HexagonTargetLowering : public TargetLowering {
+ int VarArgsFrameOffset; // Frame offset to start of varargs area.
+
+ bool CanReturnSmallStruct(const Function* CalleeFn,
+ unsigned& RetSize) const;
+
+ public:
+ HexagonTargetMachine &TM;
+ explicit HexagonTargetLowering(HexagonTargetMachine &targetmachine);
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Targets which want to do tail call
+ /// optimization should implement this function.
+ bool
+ IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const
+ SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+
+ virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
+ virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDValue Callee) const;
+
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
+ SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual MachineBasicBlock
+ *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ virtual EVT getSetCCResultType(EVT VT) const {
+ return MVT::i1;
+ }
+
+ virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ // Intrinsics
+ virtual SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const;
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ /// The type may be VoidTy, in which case only return true if the addressing
+ /// mode is legal for a load/store of any legal type.
+ /// TODO: Handle pre/postinc as well.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
+
+ /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// icmp immediate, that is the target has icmp instructions which can
+ /// compare a register against the immediate without having to materialize
+ /// the immediate into a register.
+ virtual bool isLegalICmpImmediate(int64_t Imm) const;
+ };
+} // end namespace llvm
+
+#endif // Hexagon_ISELLOWERING_H
diff --git a/lib/Target/Hexagon/HexagonImmediates.td b/lib/Target/Hexagon/HexagonImmediates.td
new file mode 100644
index 0000000..1e3fcb8
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonImmediates.td
@@ -0,0 +1,491 @@
+//=- HexagonImmediates.td - Hexagon immediate processing --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illnois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// From IA64's InstrInfo file
+def s32Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s16Imm : Operand<i32> {
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s12Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s11Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s11_0Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s11_1Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s11_2Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s11_3Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s10Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s8Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s9Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s8Imm64 : Operand<i64> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s6Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s4Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s4_0Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s4_1Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s4_2Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s4_3Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u64Imm : Operand<i64> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u32Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u16Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u16_0Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u16_1Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u16_2Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u11_3Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u10Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u9Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u8Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u7Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u6Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u6_0Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u6_1Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u6_2Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u6_3Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u5Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u4Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u3Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u2Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def n8Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def m6Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+//
+// Immediate predicates
+//
+def s32ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<32>(v);
+}]>;
+
+def s32_24ImmPred : PatLeaf<(i32 imm), [{
+ // s32_24ImmPred predicate - True if the immediate fits in a 32-bit sign
+ // extended field that is a multiple of 0x1000000.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<32,24>(v);
+}]>;
+
+def s32_16s8ImmPred : PatLeaf<(i32 imm), [{
+ // s32_16s8ImmPred predicate - True if the immediate fits in a 32-bit sign
+ // extended field that is a multiple of 0x10000.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<24,16>(v);
+}]>;
+
+def s16ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<16>(v);
+}]>;
+
+
+def s13ImmPred : PatLeaf<(i32 imm), [{
+ // immS13 predicate - True if the immediate fits in a 13-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<13>(v);
+}]>;
+
+
+def s12ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<12>(v);
+}]>;
+
+def s11_0ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<11>(v);
+}]>;
+
+
+def s11_1ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,1>(v);
+}]>;
+
+
+def s11_2ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,2>(v);
+}]>;
+
+
+def s11_3ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,3>(v);
+}]>;
+
+
+def s10ImmPred : PatLeaf<(i32 imm), [{
+ // s10ImmPred predicate - True if the immediate fits in a 10-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<10>(v);
+}]>;
+
+
+def s9ImmPred : PatLeaf<(i32 imm), [{
+ // s9ImmPred predicate - True if the immediate fits in a 9-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<9>(v);
+}]>;
+
+
+def s8ImmPred : PatLeaf<(i32 imm), [{
+ // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<8>(v);
+}]>;
+
+
+def s8Imm64Pred : PatLeaf<(i64 imm), [{
+ // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<8>(v);
+}]>;
+
+
+def s6ImmPred : PatLeaf<(i32 imm), [{
+ // s6ImmPred predicate - True if the immediate fits in a 6-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<6>(v);
+}]>;
+
+
+def s4_0ImmPred : PatLeaf<(i32 imm), [{
+ // s4_0ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<4>(v);
+}]>;
+
+
+def s4_1ImmPred : PatLeaf<(i32 imm), [{
+ // s4_1ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+ // field of 2.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<4,1>(v);
+}]>;
+
+
+def s4_2ImmPred : PatLeaf<(i32 imm), [{
+ // s4_2ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+ // field that is a multiple of 4.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<4,2>(v);
+}]>;
+
+
+def s4_3ImmPred : PatLeaf<(i32 imm), [{
+ // s4_3ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+ // field that is a multiple of 8.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<4,3>(v);
+}]>;
+
+
+def u64ImmPred : PatLeaf<(i64 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ // Adding "N ||" to supress gcc unused warning.
+ return (N || true);
+}]>;
+
+def u32ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<32>(v);
+}]>;
+
+def u16ImmPred : PatLeaf<(i32 imm), [{
+ // u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<16>(v);
+}]>;
+
+def u16_s8ImmPred : PatLeaf<(i32 imm), [{
+ // u16_s8ImmPred predicate - True if the immediate fits in a 16-bit sign
+ // extended s8 field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<16,8>(v);
+}]>;
+
+def u9ImmPred : PatLeaf<(i32 imm), [{
+ // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<9>(v);
+}]>;
+
+
+def u8ImmPred : PatLeaf<(i32 imm), [{
+ // u8ImmPred predicate - True if the immediate fits in a 8-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<8>(v);
+}]>;
+
+def u7ImmPred : PatLeaf<(i32 imm), [{
+ // u7ImmPred predicate - True if the immediate fits in a 8-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<7>(v);
+}]>;
+
+
+def u6ImmPred : PatLeaf<(i32 imm), [{
+ // u6ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<6>(v);
+}]>;
+
+def u6_0ImmPred : PatLeaf<(i32 imm), [{
+ // u6_0ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // field. Same as u6ImmPred.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<6>(v);
+}]>;
+
+def u6_1ImmPred : PatLeaf<(i32 imm), [{
+ // u6_1ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // field that is 1 bit alinged - multiple of 2.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,1>(v);
+}]>;
+
+def u6_2ImmPred : PatLeaf<(i32 imm), [{
+ // u6_2ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // field that is 2 bits alinged - multiple of 4.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,2>(v);
+}]>;
+
+def u6_3ImmPred : PatLeaf<(i32 imm), [{
+ // u6_3ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // field that is 3 bits alinged - multiple of 8.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,3>(v);
+}]>;
+
+def u5ImmPred : PatLeaf<(i32 imm), [{
+ // u5ImmPred predicate - True if the immediate fits in a 5-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<5>(v);
+}]>;
+
+
+def u3ImmPred : PatLeaf<(i32 imm), [{
+ // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<3>(v);
+}]>;
+
+
+def u2ImmPred : PatLeaf<(i32 imm), [{
+ // u2ImmPred predicate - True if the immediate fits in a 2-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<2>(v);
+}]>;
+
+
+def u1ImmPred : PatLeaf<(i1 imm), [{
+ // u1ImmPred predicate - True if the immediate fits in a 1-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<1>(v);
+}]>;
+
+def m6ImmPred : PatLeaf<(i32 imm), [{
+ // m6ImmPred predicate - True if the immediate is negative and fits in
+ // a 6-bit negative number.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<6>(v);
+}]>;
+
+//InN means negative integers in [-(2^N - 1), 0]
+def n8ImmPred : PatLeaf<(i32 imm), [{
+ // n8ImmPred predicate - True if the immediate fits in a 8-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return (-255 <= v && v <= 0);
+}]>;
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
new file mode 100644
index 0000000..7e92776
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -0,0 +1,242 @@
+//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr,
+ InstrItinClass itin> : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "Hexagon";
+
+/* Commented out for Hexagon
+ bits<2> op;
+ let Inst{31-30} = op; */ // Top two bits are the 'op' field
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Constraints = cstr;
+ let Itinerary = itin;
+}
+
+//----------------------------------------------------------------------------//
+// Intruction Classes Definitions +
+//----------------------------------------------------------------------------//
+
+// LD Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", LD> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<13> imm13;
+}
+
+// LD Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, LD> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<13> imm13;
+}
+
+// ST Instruction Class in V2/V3 can take SLOT0 only.
+// ST Instruction Class in V4 can take SLOT0 & SLOT1.
+// Definition of the instruction class CHANGED from V2/V3 to V4.
+class STInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", ST> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<13> imm13;
+}
+
+// ST Instruction Class in V2/V3 can take SLOT0 only.
+// ST Instruction Class in V4 can take SLOT0 & SLOT1.
+// Definition of the instruction class CHANGED from V2/V3 to V4.
+class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ST> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<13> imm13;
+}
+
+// ALU32 Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class ALU32Type<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", ALU32> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> imm16;
+ bits<16> imm16_2;
+}
+
+// ALU64 Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
+class ALU64Type<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", ALU64> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> imm16;
+ bits<16> imm16_2;
+}
+
+// M Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
+class MInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", M> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+}
+
+// M Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
+class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, M> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+}
+
+// S Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
+class SInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+//: InstHexagon<outs, ins, asmstr, pattern, cstr, !if(V4T, XTYPE_V4, M)> {
+ : InstHexagon<outs, ins, asmstr, pattern, "", S> {
+// : InstHexagon<outs, ins, asmstr, pattern, "", S> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+}
+
+// S Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
+class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, S> {
+// : InstHexagon<outs, ins, asmstr, pattern, cstr, S> {
+// : InstHexagon<outs, ins, asmstr, pattern, cstr, !if(V4T, XTYPE_V4, S)> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+}
+
+// J Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class JType<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", J> {
+ bits<16> imm16;
+}
+
+// JR Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class JRType<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", JR> {
+ bits<5> rs;
+ bits<5> pu; // Predicate register
+}
+
+// CR Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", CR> {
+ bits<5> rs;
+ bits<10> imm10;
+}
+
+
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", PSEUDO>;
+
+
+//----------------------------------------------------------------------------//
+// Intruction Classes Definitions -
+//----------------------------------------------------------------------------//
+
+
+//
+// ALU32 patterns
+//.
+class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : ALU32Type<outs, ins, asmstr, pattern> {
+}
+
+class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : ALU32Type<outs, ins, asmstr, pattern> {
+ let rt{0-4} = 0;
+}
+
+class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : ALU32Type<outs, ins, asmstr, pattern> {
+ let rt{0-4} = 0;
+}
+
+class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : ALU32Type<outs, ins, asmstr, pattern> {
+ let rt{0-4} = 0;
+}
+
+//
+// ALU64 patterns.
+//
+class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : ALU64Type<outs, ins, asmstr, pattern> {
+}
+
+// J Type Instructions.
+class JInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : JType<outs, ins, asmstr, pattern> {
+}
+
+// JR type Instructions.
+class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : JRType<outs, ins, asmstr, pattern> {
+}
+
+
+// Post increment ST Instruction.
+class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern, string cstr>
+ : STInstPost<outs, ins, asmstr, pattern, cstr> {
+ let rt{0-4} = 0;
+}
+
+// Post increment LD Instruction.
+class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern, string cstr>
+ : LDInstPost<outs, ins, asmstr, pattern, cstr> {
+ let rt{0-4} = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// V4 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormatsV4.td"
+
+//===----------------------------------------------------------------------===//
+// V4 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
new file mode 100644
index 0000000..bd5e449
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -0,0 +1,46 @@
+//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V4 instruction classes in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// NV type instructions.
+//
+class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", NV_V4> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<13> imm13;
+}
+
+// Definition of Post increment new value store.
+class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, NV_V4> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<13> imm13;
+}
+
+// Post increment ST Instruction.
+class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : NVInstPost_V4<outs, ins, asmstr, pattern, cstr> {
+ let rt{0-4} = 0;
+}
+
+class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", MEM_V4> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<6> imm6;
+}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
new file mode 100644
index 0000000..69a50d7
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -0,0 +1,1459 @@
+//=- HexagonInstrInfo.cpp - Hexagon Instruction Information -------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonRegisterInfo.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
+#include "Hexagon.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#define GET_INSTRINFO_CTOR
+#include "HexagonGenInstrInfo.inc"
+
+#include <iostream>
+
+
+using namespace llvm;
+
+///
+/// Constants for Hexagon instructions.
+///
+const int Hexagon_MEMW_OFFSET_MAX = 4095;
+const int Hexagon_MEMW_OFFSET_MIN = 4096;
+const int Hexagon_MEMD_OFFSET_MAX = 8191;
+const int Hexagon_MEMD_OFFSET_MIN = 8192;
+const int Hexagon_MEMH_OFFSET_MAX = 2047;
+const int Hexagon_MEMH_OFFSET_MIN = 2048;
+const int Hexagon_MEMB_OFFSET_MAX = 1023;
+const int Hexagon_MEMB_OFFSET_MIN = 1024;
+const int Hexagon_ADDI_OFFSET_MAX = 32767;
+const int Hexagon_ADDI_OFFSET_MIN = 32768;
+const int Hexagon_MEMD_AUTOINC_MAX = 56;
+const int Hexagon_MEMD_AUTOINC_MIN = 64;
+const int Hexagon_MEMW_AUTOINC_MAX = 28;
+const int Hexagon_MEMW_AUTOINC_MIN = 32;
+const int Hexagon_MEMH_AUTOINC_MAX = 14;
+const int Hexagon_MEMH_AUTOINC_MIN = 16;
+const int Hexagon_MEMB_AUTOINC_MAX = 7;
+const int Hexagon_MEMB_AUTOINC_MIN = 8;
+
+
+
+HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
+ : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
+ RI(ST, *this), Subtarget(ST) {
+}
+
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+
+
+ switch (MI->getOpcode()) {
+ case Hexagon::LDriw:
+ case Hexagon::LDrid:
+ case Hexagon::LDrih:
+ case Hexagon::LDrib:
+ case Hexagon::LDriub:
+ if (MI->getOperand(2).isFI() &&
+ MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::STriw:
+ case Hexagon::STrid:
+ case Hexagon::STrih:
+ case Hexagon::STrib:
+ if (MI->getOperand(2).isFI() &&
+ MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+
+unsigned
+HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const{
+
+ int BOpc = Hexagon::JMP;
+ int BccOpc = Hexagon::JMP_Pred;
+
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+ int regPos = 0;
+ // Check if ReverseBranchCondition has asked to reverse this branch
+ // If we want to reverse the branch an odd number of times, we want
+ // JMP_PredNot.
+ if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
+ BccOpc = Hexagon::JMP_PredNot;
+ regPos = 1;
+ }
+
+ if (FBB == 0) {
+ if (Cond.empty()) {
+ // Due to a bug in TailMerging/CFG Optimization, we need to add a
+ // special case handling of a predicated jump followed by an
+ // unconditional jump. If not, Tail Merging and CFG Optimization go
+ // into an infinite loop.
+ MachineBasicBlock *NewTBB, *NewFBB;
+ SmallVector<MachineOperand, 4> Cond;
+ MachineInstr *Term = MBB.getFirstTerminator();
+ if (isPredicated(Term) && !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond,
+ false)) {
+ MachineBasicBlock *NextBB =
+ llvm::next(MachineFunction::iterator(&MBB));
+ if (NewTBB == NextBB) {
+ ReverseBranchCondition(Cond);
+ RemoveBranch(MBB);
+ return InsertBranch(MBB, TBB, 0, Cond, DL);
+ }
+ }
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+ } else {
+ BuildMI(&MBB, DL,
+ get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
+ }
+ return 1;
+ }
+
+ BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+
+ return 2;
+}
+
+
+bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ FBB = NULL;
+
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+
+ // A basic block may looks like this:
+ //
+ // [ insn
+ // EH_LABEL
+ // insn
+ // insn
+ // insn
+ // EH_LABEL
+ // insn ]
+ //
+ // It has two succs but does not have a terminator
+ // Don't know how to handle it.
+ do {
+ --I;
+ if (I->isEHLabel())
+ return true;
+ } while (I != MBB.begin());
+
+ I = MBB.end();
+ --I;
+
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastInst->getOpcode() == Hexagon::JMP) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (LastInst->getOpcode() == Hexagon::JMP_Pred) {
+ // Block ends with fall-through true condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ if (LastInst->getOpcode() == Hexagon::JMP_PredNot) {
+ // Block ends with fall-through false condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with Hexagon::BRCOND and Hexagon:JMP, handle it.
+ if (((SecondLastInst->getOpcode() == Hexagon::BRCOND) ||
+ (SecondLastInst->getOpcode() == Hexagon::JMP_Pred)) &&
+ LastInst->getOpcode() == Hexagon::JMP) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with Hexagon::JMP_PredNot and Hexagon:JMP, handle it.
+ if ((SecondLastInst->getOpcode() == Hexagon::JMP_PredNot) &&
+ LastInst->getOpcode() == Hexagon::JMP) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two Hexagon:JMPs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == Hexagon::JMP &&
+ LastInst->getOpcode() == Hexagon::JMP) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+
+unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ int BOpc = Hexagon::JMP;
+ int BccOpc = Hexagon::JMP_Pred;
+ int BccOpcNot = Hexagon::JMP_PredNot;
+
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc &&
+ I->getOpcode() != BccOpcNot)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != BccOpc && I->getOpcode() != BccOpcNot)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+
+void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::TFR), DestReg).addReg(SrcReg);
+ return;
+ }
+ if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::TFR_64), DestReg).addReg(SrcReg);
+ return;
+ }
+ if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) {
+ // Map Pd = Ps to Pd = or(Ps, Ps).
+ BuildMI(MBB, I, DL, get(Hexagon::OR_pp),
+ DestReg).addReg(SrcReg).addReg(SrcReg);
+ return;
+ }
+ if (Hexagon::DoubleRegsRegClass.contains(DestReg, SrcReg)) {
+ // We can have an overlap between single and double reg: r1:0 = r0.
+ if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) {
+ // r1:0 = r0
+ BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg,
+ Hexagon::subreg_hireg))).addImm(0);
+ } else {
+ // r1:0 = r1 or no overlap.
+ BuildMI(MBB, I, DL, get(Hexagon::TFR), (RI.getSubReg(DestReg,
+ Hexagon::subreg_loreg))).addReg(SrcReg);
+ BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg,
+ Hexagon::subreg_hireg))).addImm(0);
+ }
+ return;
+ }
+ if (Hexagon::CRRegsRegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::TFCR), DestReg).addReg(SrcReg);
+ return;
+ }
+
+ assert (0 && "Unimplemented");
+}
+
+
+void HexagonInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+
+ DebugLoc DL = MBB.findDebugLoc(I);
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ Align);
+
+ if (Hexagon::IntRegsRegisterClass->hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Hexagon::STriw))
+ .addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ } else if (Hexagon::DoubleRegsRegisterClass->hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Hexagon::STrid))
+ .addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ } else if (Hexagon::PredRegsRegisterClass->hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Hexagon::STriw_pred))
+ .addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ } else {
+ assert(0 && "Unimplemented");
+ }
+}
+
+
+void HexagonInstrInfo::storeRegToAddr(
+ MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const
+{
+ assert(0 && "Unimplemented");
+ return;
+}
+
+
+void HexagonInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = MBB.findDebugLoc(I);
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ Align);
+
+ if (RC == Hexagon::IntRegsRegisterClass) {
+ BuildMI(MBB, I, DL, get(Hexagon::LDriw), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ } else if (RC == Hexagon::DoubleRegsRegisterClass) {
+ BuildMI(MBB, I, DL, get(Hexagon::LDrid), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ } else if (RC == Hexagon::PredRegsRegisterClass) {
+ BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ } else {
+ assert(0 && "Can't store this register to stack slot");
+ }
+}
+
+
+void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ assert(0 && "Unimplemented");
+}
+
+
+MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FI) const {
+ // Hexagon_TODO: Implement.
+ return(0);
+}
+
+
+unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
+
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetRegisterClass *TRC;
+ if (VT == MVT::i1) {
+ TRC = Hexagon::PredRegsRegisterClass;
+ } else if (VT == MVT::i32) {
+ TRC = Hexagon::IntRegsRegisterClass;
+ } else if (VT == MVT::i64) {
+ TRC = Hexagon::DoubleRegsRegisterClass;
+ } else {
+ assert(0 && "Cannot handle this register class");
+ }
+
+ unsigned NewReg = RegInfo.createVirtualRegister(TRC);
+ return NewReg;
+}
+
+
+bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
+ bool isPred = MI->getDesc().isPredicable();
+
+ if (!isPred)
+ return false;
+
+ const int Opc = MI->getOpcode();
+
+ switch(Opc) {
+ case Hexagon::TFRI:
+ return isInt<12>(MI->getOperand(1).getImm());
+
+ case Hexagon::STrid:
+ case Hexagon::STrid_indexed:
+ return isShiftedUInt<6,3>(MI->getOperand(1).getImm());
+
+ case Hexagon::STriw:
+ case Hexagon::STriw_indexed:
+ case Hexagon::STriw_nv_V4:
+ return isShiftedUInt<6,2>(MI->getOperand(1).getImm());
+
+ case Hexagon::STrih:
+ case Hexagon::STrih_indexed:
+ case Hexagon::STrih_nv_V4:
+ return isShiftedUInt<6,1>(MI->getOperand(1).getImm());
+
+ case Hexagon::STrib:
+ case Hexagon::STrib_indexed:
+ case Hexagon::STrib_nv_V4:
+ return isUInt<6>(MI->getOperand(1).getImm());
+
+ case Hexagon::LDrid:
+ case Hexagon::LDrid_indexed:
+ return isShiftedUInt<6,3>(MI->getOperand(2).getImm());
+
+ case Hexagon::LDriw:
+ case Hexagon::LDriw_indexed:
+ return isShiftedUInt<6,2>(MI->getOperand(2).getImm());
+
+ case Hexagon::LDrih:
+ case Hexagon::LDriuh:
+ case Hexagon::LDrih_indexed:
+ case Hexagon::LDriuh_indexed:
+ return isShiftedUInt<6,1>(MI->getOperand(2).getImm());
+
+ case Hexagon::LDrib:
+ case Hexagon::LDriub:
+ case Hexagon::LDrib_indexed:
+ case Hexagon::LDriub_indexed:
+ return isUInt<6>(MI->getOperand(2).getImm());
+
+ case Hexagon::POST_LDrid:
+ return isShiftedInt<4,3>(MI->getOperand(3).getImm());
+
+ case Hexagon::POST_LDriw:
+ return isShiftedInt<4,2>(MI->getOperand(3).getImm());
+
+ case Hexagon::POST_LDrih:
+ case Hexagon::POST_LDriuh:
+ return isShiftedInt<4,1>(MI->getOperand(3).getImm());
+
+ case Hexagon::POST_LDrib:
+ case Hexagon::POST_LDriub:
+ return isInt<4>(MI->getOperand(3).getImm());
+
+ case Hexagon::STrib_imm_V4:
+ case Hexagon::STrih_imm_V4:
+ case Hexagon::STriw_imm_V4:
+ return (isUInt<6>(MI->getOperand(1).getImm()) &&
+ isInt<6>(MI->getOperand(2).getImm()));
+
+ case Hexagon::ADD_ri:
+ return isInt<8>(MI->getOperand(2).getImm());
+
+ case Hexagon::ASLH:
+ case Hexagon::ASRH:
+ case Hexagon::SXTB:
+ case Hexagon::SXTH:
+ case Hexagon::ZXTB:
+ case Hexagon::ZXTH:
+ return Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+
+ case Hexagon::JMPR:
+ return false;
+ return true;
+
+ default:
+ return true;
+ }
+
+ return true;
+}
+
+
+int HexagonInstrInfo::
+getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
+ switch(Opc) {
+ case Hexagon::TFR:
+ return !invertPredicate ? Hexagon::TFR_cPt :
+ Hexagon::TFR_cNotPt;
+ case Hexagon::TFRI:
+ return !invertPredicate ? Hexagon::TFRI_cPt :
+ Hexagon::TFRI_cNotPt;
+ case Hexagon::JMP:
+ return !invertPredicate ? Hexagon::JMP_Pred :
+ Hexagon::JMP_PredNot;
+ case Hexagon::ADD_ri:
+ return !invertPredicate ? Hexagon::ADD_ri_cPt :
+ Hexagon::ADD_ri_cNotPt;
+ case Hexagon::ADD_rr:
+ return !invertPredicate ? Hexagon::ADD_rr_cPt :
+ Hexagon::ADD_rr_cNotPt;
+ case Hexagon::XOR_rr:
+ return !invertPredicate ? Hexagon::XOR_rr_cPt :
+ Hexagon::XOR_rr_cNotPt;
+ case Hexagon::AND_rr:
+ return !invertPredicate ? Hexagon::AND_rr_cPt :
+ Hexagon::AND_rr_cNotPt;
+ case Hexagon::OR_rr:
+ return !invertPredicate ? Hexagon::OR_rr_cPt :
+ Hexagon::OR_rr_cNotPt;
+ case Hexagon::SUB_rr:
+ return !invertPredicate ? Hexagon::SUB_rr_cPt :
+ Hexagon::SUB_rr_cNotPt;
+ case Hexagon::COMBINE_rr:
+ return !invertPredicate ? Hexagon::COMBINE_rr_cPt :
+ Hexagon::COMBINE_rr_cNotPt;
+ case Hexagon::ASLH:
+ return !invertPredicate ? Hexagon::ASLH_cPt_V4 :
+ Hexagon::ASLH_cNotPt_V4;
+ case Hexagon::ASRH:
+ return !invertPredicate ? Hexagon::ASRH_cPt_V4 :
+ Hexagon::ASRH_cNotPt_V4;
+ case Hexagon::SXTB:
+ return !invertPredicate ? Hexagon::SXTB_cPt_V4 :
+ Hexagon::SXTB_cNotPt_V4;
+ case Hexagon::SXTH:
+ return !invertPredicate ? Hexagon::SXTH_cPt_V4 :
+ Hexagon::SXTH_cNotPt_V4;
+ case Hexagon::ZXTB:
+ return !invertPredicate ? Hexagon::ZXTB_cPt_V4 :
+ Hexagon::ZXTB_cNotPt_V4;
+ case Hexagon::ZXTH:
+ return !invertPredicate ? Hexagon::ZXTH_cPt_V4 :
+ Hexagon::ZXTH_cNotPt_V4;
+
+ case Hexagon::JMPR:
+ return !invertPredicate ? Hexagon::JMPR_cPt :
+ Hexagon::JMPR_cNotPt;
+
+ // V4 indexed+scaled load.
+ case Hexagon::LDrid_indexed_V4:
+ return !invertPredicate ? Hexagon::LDrid_indexed_cPt_V4 :
+ Hexagon::LDrid_indexed_cNotPt_V4;
+ case Hexagon::LDrid_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDrid_indexed_shl_cPt_V4 :
+ Hexagon::LDrid_indexed_shl_cNotPt_V4;
+ case Hexagon::LDrib_indexed_V4:
+ return !invertPredicate ? Hexagon::LDrib_indexed_cPt_V4 :
+ Hexagon::LDrib_indexed_cNotPt_V4;
+ case Hexagon::LDriub_indexed_V4:
+ return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 :
+ Hexagon::LDriub_indexed_cNotPt_V4;
+ case Hexagon::LDriub_ae_indexed_V4:
+ return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 :
+ Hexagon::LDriub_indexed_cNotPt_V4;
+ case Hexagon::LDrib_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDrib_indexed_shl_cPt_V4 :
+ Hexagon::LDrib_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriub_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 :
+ Hexagon::LDriub_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriub_ae_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 :
+ Hexagon::LDriub_indexed_shl_cNotPt_V4;
+ case Hexagon::LDrih_indexed_V4:
+ return !invertPredicate ? Hexagon::LDrih_indexed_cPt_V4 :
+ Hexagon::LDrih_indexed_cNotPt_V4;
+ case Hexagon::LDriuh_indexed_V4:
+ return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 :
+ Hexagon::LDriuh_indexed_cNotPt_V4;
+ case Hexagon::LDriuh_ae_indexed_V4:
+ return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 :
+ Hexagon::LDriuh_indexed_cNotPt_V4;
+ case Hexagon::LDrih_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDrih_indexed_shl_cPt_V4 :
+ Hexagon::LDrih_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriuh_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 :
+ Hexagon::LDriuh_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriuh_ae_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 :
+ Hexagon::LDriuh_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriw_indexed_V4:
+ return !invertPredicate ? Hexagon::LDriw_indexed_cPt_V4 :
+ Hexagon::LDriw_indexed_cNotPt_V4;
+ case Hexagon::LDriw_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDriw_indexed_shl_cPt_V4 :
+ Hexagon::LDriw_indexed_shl_cNotPt_V4;
+ // Byte.
+ case Hexagon::POST_STbri:
+ return !invertPredicate ? Hexagon::POST_STbri_cPt :
+ Hexagon::POST_STbri_cNotPt;
+ case Hexagon::STrib:
+ return !invertPredicate ? Hexagon::STrib_cPt :
+ Hexagon::STrib_cNotPt;
+ case Hexagon::STrib_indexed:
+ return !invertPredicate ? Hexagon::STrib_indexed_cPt :
+ Hexagon::STrib_indexed_cNotPt;
+ case Hexagon::STrib_imm_V4:
+ return !invertPredicate ? Hexagon::STrib_imm_cPt_V4 :
+ Hexagon::STrib_imm_cNotPt_V4;
+ case Hexagon::STrib_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::STrib_indexed_shl_cPt_V4 :
+ Hexagon::STrib_indexed_shl_cNotPt_V4;
+ // Halfword.
+ case Hexagon::POST_SThri:
+ return !invertPredicate ? Hexagon::POST_SThri_cPt :
+ Hexagon::POST_SThri_cNotPt;
+ case Hexagon::STrih:
+ return !invertPredicate ? Hexagon::STrih_cPt :
+ Hexagon::STrih_cNotPt;
+ case Hexagon::STrih_indexed:
+ return !invertPredicate ? Hexagon::STrih_indexed_cPt :
+ Hexagon::STrih_indexed_cNotPt;
+ case Hexagon::STrih_imm_V4:
+ return !invertPredicate ? Hexagon::STrih_imm_cPt_V4 :
+ Hexagon::STrih_imm_cNotPt_V4;
+ case Hexagon::STrih_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::STrih_indexed_shl_cPt_V4 :
+ Hexagon::STrih_indexed_shl_cNotPt_V4;
+ // Word.
+ case Hexagon::POST_STwri:
+ return !invertPredicate ? Hexagon::POST_STwri_cPt :
+ Hexagon::POST_STwri_cNotPt;
+ case Hexagon::STriw:
+ return !invertPredicate ? Hexagon::STriw_cPt :
+ Hexagon::STriw_cNotPt;
+ case Hexagon::STriw_indexed:
+ return !invertPredicate ? Hexagon::STriw_indexed_cPt :
+ Hexagon::STriw_indexed_cNotPt;
+ case Hexagon::STriw_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::STriw_indexed_shl_cPt_V4 :
+ Hexagon::STriw_indexed_shl_cNotPt_V4;
+ case Hexagon::STriw_imm_V4:
+ return !invertPredicate ? Hexagon::STriw_imm_cPt_V4 :
+ Hexagon::STriw_imm_cNotPt_V4;
+ // Double word.
+ case Hexagon::POST_STdri:
+ return !invertPredicate ? Hexagon::POST_STdri_cPt :
+ Hexagon::POST_STdri_cNotPt;
+ case Hexagon::STrid:
+ return !invertPredicate ? Hexagon::STrid_cPt :
+ Hexagon::STrid_cNotPt;
+ case Hexagon::STrid_indexed:
+ return !invertPredicate ? Hexagon::STrid_indexed_cPt :
+ Hexagon::STrid_indexed_cNotPt;
+ case Hexagon::STrid_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::STrid_indexed_shl_cPt_V4 :
+ Hexagon::STrid_indexed_shl_cNotPt_V4;
+ // Load.
+ case Hexagon::LDrid:
+ return !invertPredicate ? Hexagon::LDrid_cPt :
+ Hexagon::LDrid_cNotPt;
+ case Hexagon::LDriw:
+ return !invertPredicate ? Hexagon::LDriw_cPt :
+ Hexagon::LDriw_cNotPt;
+ case Hexagon::LDrih:
+ return !invertPredicate ? Hexagon::LDrih_cPt :
+ Hexagon::LDrih_cNotPt;
+ case Hexagon::LDriuh:
+ return !invertPredicate ? Hexagon::LDriuh_cPt :
+ Hexagon::LDriuh_cNotPt;
+ case Hexagon::LDrib:
+ return !invertPredicate ? Hexagon::LDrib_cPt :
+ Hexagon::LDrib_cNotPt;
+ case Hexagon::LDriub:
+ return !invertPredicate ? Hexagon::LDriub_cPt :
+ Hexagon::LDriub_cNotPt;
+ case Hexagon::LDriubit:
+ return !invertPredicate ? Hexagon::LDriub_cPt :
+ Hexagon::LDriub_cNotPt;
+ // Load Indexed.
+ case Hexagon::LDrid_indexed:
+ return !invertPredicate ? Hexagon::LDrid_indexed_cPt :
+ Hexagon::LDrid_indexed_cNotPt;
+ case Hexagon::LDriw_indexed:
+ return !invertPredicate ? Hexagon::LDriw_indexed_cPt :
+ Hexagon::LDriw_indexed_cNotPt;
+ case Hexagon::LDrih_indexed:
+ return !invertPredicate ? Hexagon::LDrih_indexed_cPt :
+ Hexagon::LDrih_indexed_cNotPt;
+ case Hexagon::LDriuh_indexed:
+ return !invertPredicate ? Hexagon::LDriuh_indexed_cPt :
+ Hexagon::LDriuh_indexed_cNotPt;
+ case Hexagon::LDrib_indexed:
+ return !invertPredicate ? Hexagon::LDrib_indexed_cPt :
+ Hexagon::LDrib_indexed_cNotPt;
+ case Hexagon::LDriub_indexed:
+ return !invertPredicate ? Hexagon::LDriub_indexed_cPt :
+ Hexagon::LDriub_indexed_cNotPt;
+ // Post Increment Load.
+ case Hexagon::POST_LDrid:
+ return !invertPredicate ? Hexagon::POST_LDrid_cPt :
+ Hexagon::POST_LDrid_cNotPt;
+ case Hexagon::POST_LDriw:
+ return !invertPredicate ? Hexagon::POST_LDriw_cPt :
+ Hexagon::POST_LDriw_cNotPt;
+ case Hexagon::POST_LDrih:
+ return !invertPredicate ? Hexagon::POST_LDrih_cPt :
+ Hexagon::POST_LDrih_cNotPt;
+ case Hexagon::POST_LDriuh:
+ return !invertPredicate ? Hexagon::POST_LDriuh_cPt :
+ Hexagon::POST_LDriuh_cNotPt;
+ case Hexagon::POST_LDrib:
+ return !invertPredicate ? Hexagon::POST_LDrib_cPt :
+ Hexagon::POST_LDrib_cNotPt;
+ case Hexagon::POST_LDriub:
+ return !invertPredicate ? Hexagon::POST_LDriub_cPt :
+ Hexagon::POST_LDriub_cNotPt;
+ // DEALLOC_RETURN.
+ case Hexagon::DEALLOC_RET_V4:
+ return !invertPredicate ? Hexagon::DEALLOC_RET_cPt_V4 :
+ Hexagon::DEALLOC_RET_cNotPt_V4;
+ default:
+ assert(false && "Unexpected predicable instruction");
+ }
+}
+
+
+bool HexagonInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ int Opc = MI->getOpcode();
+ assert (isPredicable(MI) && "Expected predicable instruction");
+ bool invertJump = (!Cond.empty() && Cond[0].isImm() &&
+ (Cond[0].getImm() == 0));
+ MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump)));
+ //
+ // This assumes that the predicate is always the first operand
+ // in the set of inputs.
+ //
+ MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
+ int oper;
+ for (oper = MI->getNumOperands() - 3; oper >= 0; --oper) {
+ MachineOperand MO = MI->getOperand(oper);
+ if ((MO.isReg() && !MO.isUse() && !MO.isImplicit())) {
+ break;
+ }
+
+ if (MO.isReg()) {
+ MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(),
+ MO.isImplicit(), MO.isKill(),
+ MO.isDead(), MO.isUndef(),
+ MO.isDebug());
+ } else if (MO.isImm()) {
+ MI->getOperand(oper+1).ChangeToImmediate(MO.getImm());
+ } else {
+ assert(false && "Unexpected operand type");
+ }
+ }
+
+ int regPos = invertJump ? 1 : 0;
+ MachineOperand PredMO = Cond[regPos];
+ MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(),
+ PredMO.isImplicit(), PredMO.isKill(),
+ PredMO.isDead(), PredMO.isUndef(),
+ PredMO.isDebug());
+
+ return true;
+}
+
+
+bool
+HexagonInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCyles,
+ unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const {
+ return true;
+}
+
+
+bool
+HexagonInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles,
+ unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles,
+ unsigned ExtraFCycles,
+ const BranchProbability &Probability) const {
+ return true;
+}
+
+
+bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::TFR_cPt:
+ case Hexagon::TFR_cNotPt:
+ case Hexagon::TFRI_cPt:
+ case Hexagon::TFRI_cNotPt:
+ case Hexagon::TFR_cdnPt:
+ case Hexagon::TFR_cdnNotPt:
+ case Hexagon::TFRI_cdnPt:
+ case Hexagon::TFRI_cdnNotPt:
+ return true;
+
+ case Hexagon::JMP_Pred:
+ case Hexagon::JMP_PredNot:
+ case Hexagon::BRCOND:
+ case Hexagon::JMP_PredPt:
+ case Hexagon::JMP_PredNotPt:
+ case Hexagon::JMP_PredPnt:
+ case Hexagon::JMP_PredNotPnt:
+ return true;
+
+ case Hexagon::LDrid_indexed_cPt_V4 :
+ case Hexagon::LDrid_indexed_cdnPt_V4 :
+ case Hexagon::LDrid_indexed_cNotPt_V4 :
+ case Hexagon::LDrid_indexed_cdnNotPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDrib_indexed_cPt_V4 :
+ case Hexagon::LDrib_indexed_cdnPt_V4 :
+ case Hexagon::LDrib_indexed_cNotPt_V4 :
+ case Hexagon::LDrib_indexed_cdnNotPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriub_indexed_cPt_V4 :
+ case Hexagon::LDriub_indexed_cdnPt_V4 :
+ case Hexagon::LDriub_indexed_cNotPt_V4 :
+ case Hexagon::LDriub_indexed_cdnNotPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDrih_indexed_cPt_V4 :
+ case Hexagon::LDrih_indexed_cdnPt_V4 :
+ case Hexagon::LDrih_indexed_cNotPt_V4 :
+ case Hexagon::LDrih_indexed_cdnNotPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriuh_indexed_cPt_V4 :
+ case Hexagon::LDriuh_indexed_cdnPt_V4 :
+ case Hexagon::LDriuh_indexed_cNotPt_V4 :
+ case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriw_indexed_cPt_V4 :
+ case Hexagon::LDriw_indexed_cdnPt_V4 :
+ case Hexagon::LDriw_indexed_cNotPt_V4 :
+ case Hexagon::LDriw_indexed_cdnNotPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
+ return true;
+
+ case Hexagon::LDrid_cPt :
+ case Hexagon::LDrid_cNotPt :
+ case Hexagon::LDrid_indexed_cPt :
+ case Hexagon::LDrid_indexed_cNotPt :
+ case Hexagon::POST_LDrid_cPt :
+ case Hexagon::POST_LDrid_cNotPt :
+ case Hexagon::LDriw_cPt :
+ case Hexagon::LDriw_cNotPt :
+ case Hexagon::LDriw_indexed_cPt :
+ case Hexagon::LDriw_indexed_cNotPt :
+ case Hexagon::POST_LDriw_cPt :
+ case Hexagon::POST_LDriw_cNotPt :
+ case Hexagon::LDrih_cPt :
+ case Hexagon::LDrih_cNotPt :
+ case Hexagon::LDrih_indexed_cPt :
+ case Hexagon::LDrih_indexed_cNotPt :
+ case Hexagon::POST_LDrih_cPt :
+ case Hexagon::POST_LDrih_cNotPt :
+ case Hexagon::LDrib_cPt :
+ case Hexagon::LDrib_cNotPt :
+ case Hexagon::LDrib_indexed_cPt :
+ case Hexagon::LDrib_indexed_cNotPt :
+ case Hexagon::POST_LDrib_cPt :
+ case Hexagon::POST_LDrib_cNotPt :
+ case Hexagon::LDriuh_cPt :
+ case Hexagon::LDriuh_cNotPt :
+ case Hexagon::LDriuh_indexed_cPt :
+ case Hexagon::LDriuh_indexed_cNotPt :
+ case Hexagon::POST_LDriuh_cPt :
+ case Hexagon::POST_LDriuh_cNotPt :
+ case Hexagon::LDriub_cPt :
+ case Hexagon::LDriub_cNotPt :
+ case Hexagon::LDriub_indexed_cPt :
+ case Hexagon::LDriub_indexed_cNotPt :
+ case Hexagon::POST_LDriub_cPt :
+ case Hexagon::POST_LDriub_cNotPt :
+ return true;
+
+ case Hexagon::LDrid_cdnPt :
+ case Hexagon::LDrid_cdnNotPt :
+ case Hexagon::LDrid_indexed_cdnPt :
+ case Hexagon::LDrid_indexed_cdnNotPt :
+ case Hexagon::POST_LDrid_cdnPt_V4 :
+ case Hexagon::POST_LDrid_cdnNotPt_V4 :
+ case Hexagon::LDriw_cdnPt :
+ case Hexagon::LDriw_cdnNotPt :
+ case Hexagon::LDriw_indexed_cdnPt :
+ case Hexagon::LDriw_indexed_cdnNotPt :
+ case Hexagon::POST_LDriw_cdnPt_V4 :
+ case Hexagon::POST_LDriw_cdnNotPt_V4 :
+ case Hexagon::LDrih_cdnPt :
+ case Hexagon::LDrih_cdnNotPt :
+ case Hexagon::LDrih_indexed_cdnPt :
+ case Hexagon::LDrih_indexed_cdnNotPt :
+ case Hexagon::POST_LDrih_cdnPt_V4 :
+ case Hexagon::POST_LDrih_cdnNotPt_V4 :
+ case Hexagon::LDrib_cdnPt :
+ case Hexagon::LDrib_cdnNotPt :
+ case Hexagon::LDrib_indexed_cdnPt :
+ case Hexagon::LDrib_indexed_cdnNotPt :
+ case Hexagon::POST_LDrib_cdnPt_V4 :
+ case Hexagon::POST_LDrib_cdnNotPt_V4 :
+ case Hexagon::LDriuh_cdnPt :
+ case Hexagon::LDriuh_cdnNotPt :
+ case Hexagon::LDriuh_indexed_cdnPt :
+ case Hexagon::LDriuh_indexed_cdnNotPt :
+ case Hexagon::POST_LDriuh_cdnPt_V4 :
+ case Hexagon::POST_LDriuh_cdnNotPt_V4 :
+ case Hexagon::LDriub_cdnPt :
+ case Hexagon::LDriub_cdnNotPt :
+ case Hexagon::LDriub_indexed_cdnPt :
+ case Hexagon::LDriub_indexed_cdnNotPt :
+ case Hexagon::POST_LDriub_cdnPt_V4 :
+ case Hexagon::POST_LDriub_cdnNotPt_V4 :
+ return true;
+
+ case Hexagon::ADD_ri_cPt:
+ case Hexagon::ADD_ri_cNotPt:
+ case Hexagon::ADD_ri_cdnPt:
+ case Hexagon::ADD_ri_cdnNotPt:
+ case Hexagon::ADD_rr_cPt:
+ case Hexagon::ADD_rr_cNotPt:
+ case Hexagon::ADD_rr_cdnPt:
+ case Hexagon::ADD_rr_cdnNotPt:
+ case Hexagon::XOR_rr_cPt:
+ case Hexagon::XOR_rr_cNotPt:
+ case Hexagon::XOR_rr_cdnPt:
+ case Hexagon::XOR_rr_cdnNotPt:
+ case Hexagon::AND_rr_cPt:
+ case Hexagon::AND_rr_cNotPt:
+ case Hexagon::AND_rr_cdnPt:
+ case Hexagon::AND_rr_cdnNotPt:
+ case Hexagon::OR_rr_cPt:
+ case Hexagon::OR_rr_cNotPt:
+ case Hexagon::OR_rr_cdnPt:
+ case Hexagon::OR_rr_cdnNotPt:
+ case Hexagon::SUB_rr_cPt:
+ case Hexagon::SUB_rr_cNotPt:
+ case Hexagon::SUB_rr_cdnPt:
+ case Hexagon::SUB_rr_cdnNotPt:
+ case Hexagon::COMBINE_rr_cPt:
+ case Hexagon::COMBINE_rr_cNotPt:
+ case Hexagon::COMBINE_rr_cdnPt:
+ case Hexagon::COMBINE_rr_cdnNotPt:
+ return true;
+
+ case Hexagon::ASLH_cPt_V4:
+ case Hexagon::ASLH_cNotPt_V4:
+ case Hexagon::ASRH_cPt_V4:
+ case Hexagon::ASRH_cNotPt_V4:
+ case Hexagon::SXTB_cPt_V4:
+ case Hexagon::SXTB_cNotPt_V4:
+ case Hexagon::SXTH_cPt_V4:
+ case Hexagon::SXTH_cNotPt_V4:
+ case Hexagon::ZXTB_cPt_V4:
+ case Hexagon::ZXTB_cNotPt_V4:
+ case Hexagon::ZXTH_cPt_V4:
+ case Hexagon::ZXTH_cNotPt_V4:
+ return true;
+
+ case Hexagon::ASLH_cdnPt_V4:
+ case Hexagon::ASLH_cdnNotPt_V4:
+ case Hexagon::ASRH_cdnPt_V4:
+ case Hexagon::ASRH_cdnNotPt_V4:
+ case Hexagon::SXTB_cdnPt_V4:
+ case Hexagon::SXTB_cdnNotPt_V4:
+ case Hexagon::SXTH_cdnPt_V4:
+ case Hexagon::SXTH_cdnNotPt_V4:
+ case Hexagon::ZXTB_cdnPt_V4:
+ case Hexagon::ZXTB_cdnNotPt_V4:
+ case Hexagon::ZXTH_cdnPt_V4:
+ case Hexagon::ZXTH_cdnNotPt_V4:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+
+bool
+HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) {
+ MachineOperand MO = MI->getOperand(oper);
+ if (MO.isReg() && MO.isDef()) {
+ const TargetRegisterClass* RC = RI.getMinimalPhysRegClass(MO.getReg());
+ if (RC == Hexagon::PredRegsRegisterClass) {
+ Pred.push_back(MO);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+bool
+HexagonInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const {
+ // TODO: Fix this
+ return false;
+}
+
+
+//
+// We indicate that we want to reverse the branch by
+// inserting a 0 at the beginning of the Cond vector.
+//
+bool HexagonInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
+ Cond.erase(Cond.begin());
+ } else {
+ Cond.insert(Cond.begin(), MachineOperand::CreateImm(0));
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::
+isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumInstrs,
+ const BranchProbability &Probability) const {
+ return (NumInstrs <= 4);
+}
+
+bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::DEALLOC_RET_V4 :
+ case Hexagon::DEALLOC_RET_cPt_V4 :
+ case Hexagon::DEALLOC_RET_cNotPt_V4 :
+ case Hexagon::DEALLOC_RET_cdnPnt_V4 :
+ case Hexagon::DEALLOC_RET_cNotdnPnt_V4 :
+ case Hexagon::DEALLOC_RET_cdnPt_V4 :
+ case Hexagon::DEALLOC_RET_cNotdnPt_V4 :
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::
+isValidOffset(const int Opcode, const int Offset) const {
+ // This function is to check whether the "Offset" is in the correct range of
+ // the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is
+ // inserted to calculate the final address. Due to this reason, the function
+ // assumes that the "Offset" has correct alignment.
+
+ switch(Opcode) {
+
+ case Hexagon::LDriw:
+ case Hexagon::STriw:
+ case Hexagon::STriwt:
+ assert((Offset % 4 == 0) && "Offset has incorrect alignment");
+ return (Offset >= Hexagon_MEMW_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMW_OFFSET_MAX);
+
+ case Hexagon::LDrid:
+ case Hexagon::STrid:
+ assert((Offset % 8 == 0) && "Offset has incorrect alignment");
+ return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMD_OFFSET_MAX);
+
+ case Hexagon::LDrih:
+ case Hexagon::LDriuh:
+ case Hexagon::STrih:
+ case Hexagon::LDrih_ae:
+ assert((Offset % 2 == 0) && "Offset has incorrect alignment");
+ return (Offset >= Hexagon_MEMH_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMH_OFFSET_MAX);
+
+ case Hexagon::LDrib:
+ case Hexagon::STrib:
+ case Hexagon::LDriub:
+ case Hexagon::LDriubit:
+ case Hexagon::LDrib_ae:
+ case Hexagon::LDriub_ae:
+ return (Offset >= Hexagon_MEMB_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMB_OFFSET_MAX);
+
+ case Hexagon::ADD_ri:
+ case Hexagon::TFR_FI:
+ return (Offset >= Hexagon_ADDI_OFFSET_MIN) &&
+ (Offset <= Hexagon_ADDI_OFFSET_MAX);
+
+ case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMw_ADDi_MEM_V4 :
+ case Hexagon::MEMw_SUBi_MEM_V4 :
+ case Hexagon::MEMw_ADDr_MEM_V4 :
+ case Hexagon::MEMw_SUBr_MEM_V4 :
+ case Hexagon::MEMw_ANDr_MEM_V4 :
+ case Hexagon::MEMw_ORr_MEM_V4 :
+ assert ((Offset % 4) == 0 && "MEMOPw offset is not aligned correctly." );
+ return (0 <= Offset && Offset <= 255);
+
+ case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMh_ADDi_MEM_V4 :
+ case Hexagon::MEMh_SUBi_MEM_V4 :
+ case Hexagon::MEMh_ADDr_MEM_V4 :
+ case Hexagon::MEMh_SUBr_MEM_V4 :
+ case Hexagon::MEMh_ANDr_MEM_V4 :
+ case Hexagon::MEMh_ORr_MEM_V4 :
+ assert ((Offset % 2) == 0 && "MEMOPh offset is not aligned correctly." );
+ return (0 <= Offset && Offset <= 127);
+
+ case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMb_ADDi_MEM_V4 :
+ case Hexagon::MEMb_SUBi_MEM_V4 :
+ case Hexagon::MEMb_ADDr_MEM_V4 :
+ case Hexagon::MEMb_SUBr_MEM_V4 :
+ case Hexagon::MEMb_ANDr_MEM_V4 :
+ case Hexagon::MEMb_ORr_MEM_V4 :
+ return (0 <= Offset && Offset <= 63);
+
+ // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of
+ // any size. Later pass knows how to handle it.
+ case Hexagon::STriw_pred:
+ case Hexagon::LDriw_pred:
+ return true;
+
+ // INLINEASM is very special.
+ case Hexagon::INLINEASM:
+ return true;
+ }
+
+ assert(0 && "No offset range is defined for this opcode. Please define it in \
+ the above switch statement!");
+}
+
+
+//
+// Check if the Offset is a valid auto-inc imm by Load/Store Type.
+//
+bool HexagonInstrInfo::
+isValidAutoIncImm(const EVT VT, const int Offset) const {
+
+ if (VT == MVT::i64) {
+ return (Offset >= Hexagon_MEMD_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMD_AUTOINC_MAX &&
+ (Offset & 0x7) == 0);
+ }
+ if (VT == MVT::i32) {
+ return (Offset >= Hexagon_MEMW_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMW_AUTOINC_MAX &&
+ (Offset & 0x3) == 0);
+ }
+ if (VT == MVT::i16) {
+ return (Offset >= Hexagon_MEMH_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMH_AUTOINC_MAX &&
+ (Offset & 0x1) == 0);
+ }
+ if (VT == MVT::i8) {
+ return (Offset >= Hexagon_MEMB_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMB_AUTOINC_MAX);
+ }
+
+ assert(0 && "Not an auto-inc opc!");
+
+ return false;
+}
+
+
+bool HexagonInstrInfo::
+isMemOp(const MachineInstr *MI) const {
+ switch (MI->getOpcode())
+ {
+ case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMw_ADDi_MEM_V4 :
+ case Hexagon::MEMw_SUBi_MEM_V4 :
+ case Hexagon::MEMw_ADDr_MEM_V4 :
+ case Hexagon::MEMw_SUBr_MEM_V4 :
+ case Hexagon::MEMw_ANDr_MEM_V4 :
+ case Hexagon::MEMw_ORr_MEM_V4 :
+ case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMh_ADDi_MEM_V4 :
+ case Hexagon::MEMh_SUBi_MEM_V4 :
+ case Hexagon::MEMh_ADDr_MEM_V4 :
+ case Hexagon::MEMh_SUBr_MEM_V4 :
+ case Hexagon::MEMh_ANDr_MEM_V4 :
+ case Hexagon::MEMh_ORr_MEM_V4 :
+ case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMb_ADDi_MEM_V4 :
+ case Hexagon::MEMb_SUBi_MEM_V4 :
+ case Hexagon::MEMb_ADDr_MEM_V4 :
+ case Hexagon::MEMb_SUBr_MEM_V4 :
+ case Hexagon::MEMb_ANDr_MEM_V4 :
+ case Hexagon::MEMb_ORr_MEM_V4 :
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::
+isSpillPredRegOp(const MachineInstr *MI) const {
+ switch (MI->getOpcode())
+ {
+ case Hexagon::STriw_pred :
+ case Hexagon::LDriw_pred :
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const {
+ const HexagonRegisterInfo& QRI = getRegisterInfo();
+ switch (MI->getOpcode())
+ {
+ case Hexagon::ADD_ri_cPt:
+ case Hexagon::ADD_ri_cNotPt:
+ case Hexagon::ADD_rr_cPt:
+ case Hexagon::ADD_rr_cNotPt:
+ case Hexagon::XOR_rr_cPt:
+ case Hexagon::XOR_rr_cNotPt:
+ case Hexagon::AND_rr_cPt:
+ case Hexagon::AND_rr_cNotPt:
+ case Hexagon::OR_rr_cPt:
+ case Hexagon::OR_rr_cNotPt:
+ case Hexagon::SUB_rr_cPt:
+ case Hexagon::SUB_rr_cNotPt:
+ case Hexagon::COMBINE_rr_cPt:
+ case Hexagon::COMBINE_rr_cNotPt:
+ return true;
+ case Hexagon::ASLH_cPt_V4:
+ case Hexagon::ASLH_cNotPt_V4:
+ case Hexagon::ASRH_cPt_V4:
+ case Hexagon::ASRH_cNotPt_V4:
+ case Hexagon::SXTB_cPt_V4:
+ case Hexagon::SXTB_cNotPt_V4:
+ case Hexagon::SXTH_cPt_V4:
+ case Hexagon::SXTH_cNotPt_V4:
+ case Hexagon::ZXTB_cPt_V4:
+ case Hexagon::ZXTB_cNotPt_V4:
+ case Hexagon::ZXTH_cPt_V4:
+ case Hexagon::ZXTH_cNotPt_V4:
+ return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+
+ default:
+ return false;
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::
+isConditionalLoad (const MachineInstr* MI) const {
+ const HexagonRegisterInfo& QRI = getRegisterInfo();
+ switch (MI->getOpcode())
+ {
+ case Hexagon::LDrid_cPt :
+ case Hexagon::LDrid_cNotPt :
+ case Hexagon::LDrid_indexed_cPt :
+ case Hexagon::LDrid_indexed_cNotPt :
+ case Hexagon::LDriw_cPt :
+ case Hexagon::LDriw_cNotPt :
+ case Hexagon::LDriw_indexed_cPt :
+ case Hexagon::LDriw_indexed_cNotPt :
+ case Hexagon::LDrih_cPt :
+ case Hexagon::LDrih_cNotPt :
+ case Hexagon::LDrih_indexed_cPt :
+ case Hexagon::LDrih_indexed_cNotPt :
+ case Hexagon::LDrib_cPt :
+ case Hexagon::LDrib_cNotPt :
+ case Hexagon::LDrib_indexed_cPt :
+ case Hexagon::LDrib_indexed_cNotPt :
+ case Hexagon::LDriuh_cPt :
+ case Hexagon::LDriuh_cNotPt :
+ case Hexagon::LDriuh_indexed_cPt :
+ case Hexagon::LDriuh_indexed_cNotPt :
+ case Hexagon::LDriub_cPt :
+ case Hexagon::LDriub_cNotPt :
+ case Hexagon::LDriub_indexed_cPt :
+ case Hexagon::LDriub_indexed_cNotPt :
+ return true;
+ case Hexagon::POST_LDrid_cPt :
+ case Hexagon::POST_LDrid_cNotPt :
+ case Hexagon::POST_LDriw_cPt :
+ case Hexagon::POST_LDriw_cNotPt :
+ case Hexagon::POST_LDrih_cPt :
+ case Hexagon::POST_LDrih_cNotPt :
+ case Hexagon::POST_LDrib_cPt :
+ case Hexagon::POST_LDrib_cNotPt :
+ case Hexagon::POST_LDriuh_cPt :
+ case Hexagon::POST_LDriuh_cNotPt :
+ case Hexagon::POST_LDriub_cPt :
+ case Hexagon::POST_LDriub_cNotPt :
+ return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+ case Hexagon::LDrid_indexed_cPt_V4 :
+ case Hexagon::LDrid_indexed_cNotPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrib_indexed_cPt_V4 :
+ case Hexagon::LDrib_indexed_cNotPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriub_indexed_cPt_V4 :
+ case Hexagon::LDriub_indexed_cNotPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrih_indexed_cPt_V4 :
+ case Hexagon::LDrih_indexed_cNotPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriuh_indexed_cPt_V4 :
+ case Hexagon::LDriuh_indexed_cNotPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriw_indexed_cPt_V4 :
+ case Hexagon::LDriw_indexed_cNotPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
+ return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+ default:
+ return false;
+ }
+ return false;
+}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
new file mode 100644
index 0000000..d549c46
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -0,0 +1,166 @@
+//=- HexagonInstrInfo.h - Hexagon Instruction Information ---------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonINSTRUCTIONINFO_H
+#define HexagonINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "HexagonRegisterInfo.h"
+
+
+#define GET_INSTRINFO_HEADER
+#include "HexagonGenInstrInfo.inc"
+
+namespace llvm {
+
+class HexagonInstrInfo : public HexagonGenInstrInfo {
+ const HexagonRegisterInfo RI;
+ const HexagonSubtarget& Subtarget;
+public:
+ explicit HexagonInstrInfo(HexagonSubtarget &ST);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const HexagonRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
+ unsigned createVR(MachineFunction* MF, MVT VT) const;
+
+ virtual bool isPredicable(MachineInstr *MI) const;
+ virtual bool
+ PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+ unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles, unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles, unsigned ExtraFCycles,
+ const BranchProbability &Probability) const;
+
+ virtual bool isPredicated(const MachineInstr *MI) const;
+ virtual bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+ virtual bool
+ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ virtual bool
+ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ virtual bool
+ isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumCycles,
+ const BranchProbability &Probability) const;
+
+ bool isValidOffset(const int Opcode, const int Offset) const;
+ bool isValidAutoIncImm(const EVT VT, const int Offset) const;
+ bool isMemOp(const MachineInstr *MI) const;
+ bool isSpillPredRegOp(const MachineInstr *MI) const;
+ bool isU6_3Immediate(const int value) const;
+ bool isU6_2Immediate(const int value) const;
+ bool isU6_1Immediate(const int value) const;
+ bool isU6_0Immediate(const int value) const;
+ bool isS4_3Immediate(const int value) const;
+ bool isS4_2Immediate(const int value) const;
+ bool isS4_1Immediate(const int value) const;
+ bool isS4_0Immediate(const int value) const;
+ bool isS12_Immediate(const int value) const;
+ bool isU6_Immediate(const int value) const;
+ bool isS8_Immediate(const int value) const;
+ bool isS6_Immediate(const int value) const;
+
+ bool isConditionalALU32 (const MachineInstr* MI) const;
+ bool isConditionalLoad (const MachineInstr* MI) const;
+ bool isDeallocRet(const MachineInstr *MI) const;
+
+private:
+ int getMatchingCondBranchOpcode(int Opc, bool sense) const;
+
+};
+
+}
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
new file mode 100644
index 0000000..cc508b7
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -0,0 +1,3014 @@
+//==- HexagonInstrInfo.td - Target Description for Hexagon -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormats.td"
+include "HexagonImmediates.td"
+
+//===----------------------------------------------------------------------===//
+// Hexagon Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasV2T : Predicate<"Subtarget.hasV2TOps()">;
+def HasV2TOnly : Predicate<"Subtarget.hasV2TOpsOnly()">;
+def NoV2T : Predicate<"!Subtarget.hasV2TOps()">;
+def HasV3T : Predicate<"Subtarget.hasV3TOps()">;
+def HasV3TOnly : Predicate<"Subtarget.hasV3TOpsOnly()">;
+def NoV3T : Predicate<"!Subtarget.hasV3TOps()">;
+def HasV4T : Predicate<"Subtarget.hasV4TOps()">;
+def NoV4T : Predicate<"!Subtarget.hasV4TOps()">;
+def UseMEMOP : Predicate<"Subtarget.useMemOps()">;
+
+// Addressing modes.
+def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
+def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>;
+def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>;
+def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>;
+def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>;
+def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>;
+def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>;
+def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>;
+
+// Address operands.
+def MEMrr : Operand<i32> {
+ let PrintMethod = "printHexagonMEMrrOperand";
+ let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+
+// Address operands
+def MEMri : Operand<i32> {
+ let PrintMethod = "printHexagonMEMriOperand";
+ let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+
+def MEMri_s11_2 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectMEMriS11_2", []> {
+ let PrintMethod = "printHexagonMEMriOperand";
+ let MIOperandInfo = (ops IntRegs, s11Imm);
+}
+
+def FrameIndex : Operand<i32> {
+ let PrintMethod = "printHexagonFrameIndexOperand";
+ let MIOperandInfo = (ops IntRegs, s11Imm);
+}
+
+let PrintMethod = "printGlobalOperand" in
+ def globaladdress : Operand<i32>;
+
+let PrintMethod = "printJumpTable" in
+ def jumptablebase : Operand<i32>;
+
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+def bblabel : Operand<i32>;
+def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf , [], "BasicBlockSDNode">;
+
+def symbolHi32 : Operand<i32> {
+ let PrintMethod = "printSymbolHi";
+}
+def symbolLo32 : Operand<i32> {
+ let PrintMethod = "printSymbolLo";
+}
+
+// Multi-class for logical operators.
+multiclass ALU32_rr_ri<string OpcStr, SDNode OpNode> {
+ def rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+ def ri : ALU32_ri<(outs IntRegs:$dst), (ins s10Imm:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "(#$b, $c)")),
+ [(set IntRegs:$dst, (OpNode s10Imm:$b, IntRegs:$c))]>;
+}
+
+// Multi-class for compare ops.
+let isCompare = 1 in {
+multiclass CMP64_rr<string OpcStr, PatFrag OpNode> {
+ def rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set PredRegs:$dst, (OpNode DoubleRegs:$b, DoubleRegs:$c))]>;
+}
+multiclass CMP32_rr<string OpcStr, PatFrag OpNode> {
+ def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+}
+
+multiclass CMP32_rr_ri_s10<string OpcStr, PatFrag OpNode> {
+ def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s10Imm:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, s10ImmPred:$c))]>;
+}
+
+multiclass CMP32_rr_ri_u9<string OpcStr, PatFrag OpNode> {
+ def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, u9ImmPred:$c))]>;
+}
+
+multiclass CMP32_ri_u9<string OpcStr, PatFrag OpNode> {
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, u9ImmPred:$c))]>;
+}
+
+multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> {
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Imm:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, s8ImmPred:$c))]>;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// http://qualnet.qualcomm.com/~erich/v1/htmldocs/index.html
+// http://qualnet.qualcomm.com/~erich/v2/htmldocs/index.html
+// http://qualnet.qualcomm.com/~erich/v3/htmldocs/index.html
+// http://qualnet.qualcomm.com/~erich/v4/htmldocs/index.html
+// http://qualnet.qualcomm.com/~erich/v5/htmldocs/index.html
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU32/ALU +
+//===----------------------------------------------------------------------===//
+// Add.
+let isPredicable = 1 in
+def ADD_rr : ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = add($src1, $src2)",
+ [(set IntRegs:$dst, (add IntRegs:$src1, IntRegs:$src2))]>;
+
+let isPredicable = 1 in
+def ADD_ri : ALU32_ri<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s16Imm:$src2),
+ "$dst = add($src1, #$src2)",
+ [(set IntRegs:$dst, (add IntRegs:$src1, s16ImmPred:$src2))]>;
+
+// Logical operations.
+let isPredicable = 1 in
+def XOR_rr : ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = xor($src1, $src2)",
+ [(set IntRegs:$dst, (xor IntRegs:$src1, IntRegs:$src2))]>;
+
+let isPredicable = 1 in
+def AND_rr : ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = and($src1, $src2)",
+ [(set IntRegs:$dst, (and IntRegs:$src1, IntRegs:$src2))]>;
+
+def OR_ri : ALU32_ri<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s8Imm:$src2),
+ "$dst = or($src1, #$src2)",
+ [(set IntRegs:$dst, (or IntRegs:$src1, s8ImmPred:$src2))]>;
+
+def NOT_rr : ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1),
+ "$dst = not($src1)",
+ [(set IntRegs:$dst, (not IntRegs:$src1))]>;
+
+def AND_ri : ALU32_ri<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s10Imm:$src2),
+ "$dst = and($src1, #$src2)",
+ [(set IntRegs:$dst, (and IntRegs:$src1, s10ImmPred:$src2))]>;
+
+let isPredicable = 1 in
+def OR_rr : ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = or($src1, $src2)",
+ [(set IntRegs:$dst, (or IntRegs:$src1, IntRegs:$src2))]>;
+
+// Negate.
+def NEG : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = neg($src1)",
+ [(set IntRegs:$dst, (ineg IntRegs:$src1))]>;
+// Nop.
+let neverHasSideEffects = 1 in
+def NOP : ALU32_rr<(outs), (ins),
+ "nop",
+ []>;
+
+// Subtract.
+let isPredicable = 1 in
+def SUB_rr : ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = sub($src1, $src2)",
+ [(set IntRegs:$dst, (sub IntRegs:$src1, IntRegs:$src2))]>;
+
+// Transfer immediate.
+let isReMaterializable = 1, isPredicable = 1 in
+def TFRI : ALU32_ri<(outs IntRegs:$dst), (ins s16Imm:$src1),
+ "$dst = #$src1",
+ [(set IntRegs:$dst, s16ImmPred:$src1)]>;
+
+// Transfer register.
+let neverHasSideEffects = 1, isPredicable = 1 in
+def TFR : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = $src1",
+ []>;
+
+// Transfer control register.
+let neverHasSideEffects = 1 in
+def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1),
+ "$dst = $src1",
+ []>;
+//===----------------------------------------------------------------------===//
+// ALU32/ALU -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PERM +
+//===----------------------------------------------------------------------===//
+
+// Combine.
+let isPredicable = 1, neverHasSideEffects = 1 in
+def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = combine($src1, $src2)",
+ []>;
+
+// Mux.
+def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
+ DoubleRegs:$src2,
+ DoubleRegs:$src3),
+ "$dst = vmux($src1, $src2, $src3)",
+ []>;
+
+def MUX_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ IntRegs:$src2, IntRegs:$src3),
+ "$dst = mux($src1, $src2, $src3)",
+ [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+def MUX_ir : ALU32_ir<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2,
+ IntRegs:$src3),
+ "$dst = mux($src1, #$src2, $src3)",
+ [(set IntRegs:$dst, (select PredRegs:$src1,
+ s8ImmPred:$src2, IntRegs:$src3))]>;
+
+def MUX_ri : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2,
+ s8Imm:$src3),
+ "$dst = mux($src1, $src2, #$src3)",
+ [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2,
+ s8ImmPred:$src3))]>;
+
+def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2,
+ s8Imm:$src3),
+ "$dst = mux($src1, #$src2, #$src3)",
+ [(set IntRegs:$dst, (select PredRegs:$src1, s8ImmPred:$src2,
+ s8ImmPred:$src3))]>;
+
+// Shift halfword.
+let isPredicable = 1 in
+def ASLH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = aslh($src1)",
+ [(set IntRegs:$dst, (shl 16, IntRegs:$src1))]>;
+
+let isPredicable = 1 in
+def ASRH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = asrh($src1)",
+ [(set IntRegs:$dst, (sra 16, IntRegs:$src1))]>;
+
+// Sign extend.
+let isPredicable = 1 in
+def SXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = sxtb($src1)",
+ [(set IntRegs:$dst, (sext_inreg IntRegs:$src1, i8))]>;
+
+let isPredicable = 1 in
+def SXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = sxth($src1)",
+ [(set IntRegs:$dst, (sext_inreg IntRegs:$src1, i16))]>;
+
+// Zero extend.
+let isPredicable = 1, neverHasSideEffects = 1 in
+def ZXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = zxtb($src1)",
+ []>;
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = zxth($src1)",
+ []>;
+//===----------------------------------------------------------------------===//
+// ALU32/PERM -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PRED +
+//===----------------------------------------------------------------------===//
+
+// Conditional add.
+let neverHasSideEffects = 1 in
+def ADD_ri_cPt : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+ "if ($src1) $dst = add($src2, #$src3)",
+ []>;
+
+let neverHasSideEffects = 1 in
+def ADD_ri_cNotPt : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+ "if (!$src1) $dst = add($src2, #$src3)",
+ []>;
+
+let neverHasSideEffects = 1 in
+def ADD_ri_cdnPt : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+ "if ($src1.new) $dst = add($src2, #$src3)",
+ []>;
+
+let neverHasSideEffects = 1 in
+def ADD_ri_cdnNotPt : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+ "if (!$src1.new) $dst = add($src2, #$src3)",
+ []>;
+
+let neverHasSideEffects = 1 in
+def ADD_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst = add($src2, $src3)",
+ []>;
+
+let neverHasSideEffects = 1 in
+def ADD_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst = add($src2, $src3)",
+ []>;
+
+let neverHasSideEffects = 1 in
+def ADD_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst = add($src2, $src3)",
+ []>;
+
+let neverHasSideEffects = 1 in
+def ADD_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst = add($src2, $src3)",
+ []>;
+
+
+// Conditional combine.
+
+let neverHasSideEffects = 1 in
+def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst = combine($src2, $src3)",
+ []>;
+
+let neverHasSideEffects = 1 in
+def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst = combine($src2, $src3)",
+ []>;
+
+let neverHasSideEffects = 1 in
+def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst = combine($src2, $src3)",
+ []>;
+
+let neverHasSideEffects = 1 in
+def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst = combine($src2, $src3)",
+ []>;
+
+// Conditional logical operations.
+
+def XOR_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst = xor($src2, $src3)",
+ []>;
+
+def XOR_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst = xor($src2, $src3)",
+ []>;
+
+def XOR_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst = xor($src2, $src3)",
+ []>;
+
+def XOR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst = xor($src2, $src3)",
+ []>;
+
+def AND_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst = and($src2, $src3)",
+ []>;
+
+def AND_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst = and($src2, $src3)",
+ []>;
+
+def AND_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst = and($src2, $src3)",
+ []>;
+
+def AND_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst = and($src2, $src3)",
+ []>;
+
+def OR_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst = or($src2, $src3)",
+ []>;
+
+def OR_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst = or($src2, $src3)",
+ []>;
+
+def OR_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst = or($src2, $src3)",
+ []>;
+
+def OR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst = or($src2, $src3)",
+ []>;
+
+
+// Conditional subtract.
+
+def SUB_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst = sub($src2, $src3)",
+ []>;
+
+def SUB_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst = sub($src2, $src3)",
+ []>;
+
+def SUB_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst = sub($src2, $src3)",
+ []>;
+
+def SUB_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst = sub($src2, $src3)",
+ []>;
+
+
+// Conditional transfer.
+
+let neverHasSideEffects = 1 in
+def TFR_cPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = $src2",
+ []>;
+
+let neverHasSideEffects = 1 in
+def TFR_cNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ IntRegs:$src2),
+ "if (!$src1) $dst = $src2",
+ []>;
+
+let neverHasSideEffects = 1 in
+def TFRI_cPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2),
+ "if ($src1) $dst = #$src2",
+ []>;
+
+let neverHasSideEffects = 1 in
+def TFRI_cNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ s12Imm:$src2),
+ "if (!$src1) $dst = #$src2",
+ []>;
+
+let neverHasSideEffects = 1 in
+def TFR_cdnPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ IntRegs:$src2),
+ "if ($src1.new) $dst = $src2",
+ []>;
+
+let neverHasSideEffects = 1 in
+def TFR_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ IntRegs:$src2),
+ "if (!$src1.new) $dst = $src2",
+ []>;
+
+let neverHasSideEffects = 1 in
+def TFRI_cdnPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ s12Imm:$src2),
+ "if ($src1.new) $dst = #$src2",
+ []>;
+
+let neverHasSideEffects = 1 in
+def TFRI_cdnNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ s12Imm:$src2),
+ "if (!$src1.new) $dst = #$src2",
+ []>;
+
+// Compare.
+defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", setugt>;
+defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", setgt>;
+defm CMPLT : CMP32_rr<"cmp.lt", setlt>;
+defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", seteq>;
+defm CMPGE : CMP32_ri_s8<"cmp.ge", setge>;
+defm CMPGEU : CMP32_ri_u9<"cmp.geu", setuge>;
+//===----------------------------------------------------------------------===//
+// ALU32/PRED -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU32/VH +
+//===----------------------------------------------------------------------===//
+// Vector add halfwords
+
+// Vector averagehalfwords
+
+// Vector subtract halfwords
+//===----------------------------------------------------------------------===//
+// ALU32/VH -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU +
+//===----------------------------------------------------------------------===//
+// Add.
+def ADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = add($src1, $src2)",
+ [(set DoubleRegs:$dst, (add DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+// Add halfword.
+
+// Compare.
+defm CMPEHexagon4 : CMP64_rr<"cmp.eq", seteq>;
+defm CMPGT64 : CMP64_rr<"cmp.gt", setgt>;
+defm CMPGTU64 : CMP64_rr<"cmp.gtu", setugt>;
+
+// Logical operations.
+def AND_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = and($src1, $src2)",
+ [(set DoubleRegs:$dst, (and DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+def OR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = or($src1, $src2)",
+ [(set DoubleRegs:$dst, (or DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+def XOR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = xor($src1, $src2)",
+ [(set DoubleRegs:$dst, (xor DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+// Maximum.
+def MAXw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = max($src2, $src1)",
+ [(set IntRegs:$dst, (select (i1 (setlt IntRegs:$src2,
+ IntRegs:$src1)),
+ IntRegs:$src1, IntRegs:$src2))]>;
+
+// Minimum.
+def MINw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = min($src2, $src1)",
+ [(set IntRegs:$dst, (select (i1 (setgt IntRegs:$src2,
+ IntRegs:$src1)),
+ IntRegs:$src1, IntRegs:$src2))]>;
+
+// Subtract.
+def SUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = sub($src1, $src2)",
+ [(set DoubleRegs:$dst, (sub DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+// Subtract halfword.
+
+// Transfer register.
+let neverHasSideEffects = 1 in
+def TFR_64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
+ "$dst = $src1",
+ []>;
+//===----------------------------------------------------------------------===//
+// ALU64/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/BIT +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/BIT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/PERM +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/PERM -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/VB +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/VB -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/VH +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/VH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/VW +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/VW -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// CR +
+//===----------------------------------------------------------------------===//
+// Logical reductions on predicates.
+
+// Looping instructions.
+
+// Pipelined looping instructions.
+
+// Logical operations on predicates.
+def AND_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
+ "$dst = and($src1, $src2)",
+ [(set PredRegs:$dst, (and PredRegs:$src1, PredRegs:$src2))]>;
+
+let neverHasSideEffects = 1 in
+def AND_pnotp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1,
+ PredRegs:$src2),
+ "$dst = and($src1, !$src2)",
+ []>;
+
+def NOT_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+ "$dst = not($src1)",
+ [(set PredRegs:$dst, (not PredRegs:$src1))]>;
+
+def ANY_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+ "$dst = any8($src1)",
+ []>;
+
+def ALL_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+ "$dst = all8($src1)",
+ []>;
+
+def VITPACK_pp : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ PredRegs:$src2),
+ "$dst = vitpack($src1, $src2)",
+ []>;
+
+def VALIGN_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2,
+ PredRegs:$src3),
+ "$dst = valignb($src1, $src2, $src3)",
+ []>;
+
+def VSPLICE_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2,
+ PredRegs:$src3),
+ "$dst = vspliceb($src1, $src2, $src3)",
+ []>;
+
+def MASK_p : SInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1),
+ "$dst = mask($src1)",
+ []>;
+
+def NOT_Ps : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+ "$dst = not($src1)",
+ [(set PredRegs:$dst, (not PredRegs:$src1))]>;
+
+def OR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
+ "$dst = or($src1, $src2)",
+ [(set PredRegs:$dst, (or PredRegs:$src1, PredRegs:$src2))]>;
+
+def XOR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
+ "$dst = xor($src1, $src2)",
+ [(set PredRegs:$dst, (xor PredRegs:$src1, PredRegs:$src2))]>;
+
+
+// User control register transfer.
+//===----------------------------------------------------------------------===//
+// CR -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// J +
+//===----------------------------------------------------------------------===//
+// Jump to address.
+let isBranch = 1, isTerminator=1, isBarrier = 1, isPredicable = 1 in {
+ def JMP : JInst< (outs),
+ (ins brtarget:$offset),
+ "jump $offset",
+ [(br bb:$offset)]>;
+}
+
+// if (p0) jump
+let isBranch = 1, isTerminator=1, Defs = [PC] in {
+ def JMP_Pred : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if ($src) jump $offset",
+ [(brcond PredRegs:$src, bb:$offset)]>;
+}
+
+// if (!p0) jump
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
+ def JMP_PredNot : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if (!$src) jump $offset",
+ []>;
+}
+
+let isTerminator = 1, isBranch = 1, neverHasSideEffects = 1, Defs = [PC] in {
+ def BRCOND : JInst < (outs), (ins PredRegs:$pred, brtarget:$dst),
+ "if ($pred) jump $dst",
+ []>;
+}
+
+// Jump to address conditioned on new predicate.
+// if (p0) jump:t
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
+ def JMP_PredPt : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if ($src.new) jump:t $offset",
+ []>;
+}
+
+// if (!p0) jump:t
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
+ def JMP_PredNotPt : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if (!$src.new) jump:t $offset",
+ []>;
+}
+
+// Not taken.
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
+ def JMP_PredPnt : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if ($src.new) jump:nt $offset",
+ []>;
+}
+
+// Not taken.
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
+ def JMP_PredNotPnt : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if (!$src.new) jump:nt $offset",
+ []>;
+}
+//===----------------------------------------------------------------------===//
+// J -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// JR +
+//===----------------------------------------------------------------------===//
+def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+// Jump to address from register.
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR: JRInst<(outs), (ins),
+ "jumpr r31",
+ [(retflag)]>;
+}
+
+// Jump to address from register.
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cPt: JRInst<(outs), (ins PredRegs:$src1),
+ "if ($src1) jumpr r31",
+ []>;
+}
+
+// Jump to address from register.
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cNotPt: JRInst<(outs), (ins PredRegs:$src1),
+ "if (!$src1) jumpr r31",
+ []>;
+}
+
+//===----------------------------------------------------------------------===//
+// JR -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LD +
+//===----------------------------------------------------------------------===//
+///
+/// Make sure that in post increment load, the first operand is always the post
+/// increment operand.
+///
+// Load doubleword.
+let isPredicable = 1 in
+def LDrid : LDInst<(outs DoubleRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memd($addr)",
+ [(set DoubleRegs:$dst, (load ADDRriS11_3:$addr))]>;
+
+let isPredicable = 1, AddedComplexity = 20 in
+def LDrid_indexed : LDInst<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, s11_3Imm:$offset),
+ "$dst=memd($src1+#$offset)",
+ [(set DoubleRegs:$dst, (load (add IntRegs:$src1,
+ s11_3ImmPred:$offset)))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_GP : LDInst<(outs DoubleRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memd(#$global+$offset)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDd_GP : LDInst<(outs DoubleRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memd(#$global)",
+ []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid : LDInstPI<(outs DoubleRegs:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, s4Imm:$offset),
+ "$dst = memd($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+// Load doubleword conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_cPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1) $dst = memd($addr)",
+ []>;
+
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_cNotPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1) $dst = memd($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_indexed_cPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
+ "if ($src1) $dst=memd($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_indexed_cNotPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
+ "if (!$src1) $dst=memd($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid_cPt : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
+ "if ($src1) $dst1 = memd($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid_cNotPt : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
+ "if (!$src1) $dst1 = memd($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_cdnPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1.new) $dst = memd($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_cdnNotPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1.new) $dst = memd($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_indexed_cdnPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
+ "if ($src1.new) $dst=memd($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_indexed_cdnNotPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
+ "if (!$src1.new) $dst=memd($src2+#$src3)",
+ []>;
+
+
+// Load byte.
+let isPredicable = 1 in
+def LDrib : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memb($addr)",
+ [(set IntRegs:$dst, (sextloadi8 ADDRriS11_0:$addr))]>;
+
+def LDrib_ae : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memb($addr)",
+ [(set IntRegs:$dst, (extloadi8 ADDRriS11_0:$addr))]>;
+
+// Indexed load byte.
+let isPredicable = 1, AddedComplexity = 20 in
+def LDrib_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_0Imm:$offset),
+ "$dst=memb($src1+#$offset)",
+ [(set IntRegs:$dst, (sextloadi8 (add IntRegs:$src1,
+ s11_0ImmPred:$offset)))]>;
+
+
+// Indexed load byte any-extend.
+let AddedComplexity = 20 in
+def LDrib_ae_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_0Imm:$offset),
+ "$dst=memb($src1+#$offset)",
+ [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1,
+ s11_0ImmPred:$offset)))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memb(#$global+$offset)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDb_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memb(#$global)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDub_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memub(#$global)",
+ []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, s4Imm:$offset),
+ "$dst = memb($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+// Load byte conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1) $dst = memb($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1) $dst = memb($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_indexed_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if ($src1) $dst = memb($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if (!$src1) $dst = memb($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if ($src1) $dst1 = memb($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if (!$src1) $dst1 = memb($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1.new) $dst = memb($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1.new) $dst = memb($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if ($src1.new) $dst = memb($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if (!$src1.new) $dst = memb($src2+#$src3)",
+ []>;
+
+
+// Load halfword.
+let isPredicable = 1 in
+def LDrih : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memh($addr)",
+ [(set IntRegs:$dst, (sextloadi16 ADDRriS11_1:$addr))]>;
+
+let isPredicable = 1, AddedComplexity = 20 in
+def LDrih_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_1Imm:$offset),
+ "$dst=memh($src1+#$offset)",
+ [(set IntRegs:$dst, (sextloadi16 (add IntRegs:$src1,
+ s11_1ImmPred:$offset)))] >;
+
+def LDrih_ae : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memh($addr)",
+ [(set IntRegs:$dst, (extloadi16 ADDRriS11_1:$addr))]>;
+
+let AddedComplexity = 20 in
+def LDrih_ae_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_1Imm:$offset),
+ "$dst=memh($src1+#$offset)",
+ [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1,
+ s11_1ImmPred:$offset)))] >;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memh(#$global+$offset)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDh_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memh(#$global)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDuh_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memuh(#$global)",
+ []>;
+
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, s4Imm:$offset),
+ "$dst = memh($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+// Load halfword conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1) $dst = memh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1) $dst = memh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_indexed_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if ($src1) $dst = memh($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if (!$src1) $dst = memh($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if ($src1) $dst1 = memh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if (!$src1) $dst1 = memh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1.new) $dst = memh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1.new) $dst = memh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if ($src1.new) $dst = memh($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if (!$src1.new) $dst = memh($src2+#$src3)",
+ []>;
+
+// Load unsigned byte.
+let isPredicable = 1 in
+def LDriub : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memub($addr)",
+ [(set IntRegs:$dst, (zextloadi8 ADDRriS11_0:$addr))]>;
+
+let isPredicable = 1 in
+def LDriubit : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memub($addr)",
+ [(set IntRegs:$dst, (zextloadi1 ADDRriS11_0:$addr))]>;
+
+let isPredicable = 1, AddedComplexity = 20 in
+def LDriub_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_0Imm:$offset),
+ "$dst=memub($src1+#$offset)",
+ [(set IntRegs:$dst, (zextloadi8 (add IntRegs:$src1,
+ s11_0ImmPred:$offset)))]>;
+
+let AddedComplexity = 20 in
+def LDriubit_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_0Imm:$offset),
+ "$dst=memub($src1+#$offset)",
+ [(set IntRegs:$dst, (zextloadi1 (add IntRegs:$src1,
+ s11_0ImmPred:$offset)))]>;
+
+def LDriub_ae : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memub($addr)",
+ [(set IntRegs:$dst, (extloadi8 ADDRriS11_0:$addr))]>;
+
+
+let AddedComplexity = 20 in
+def LDriub_ae_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_0Imm:$offset),
+ "$dst=memub($src1+#$offset)",
+ [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1,
+ s11_0ImmPred:$offset)))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memub(#$global+$offset)",
+ []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, s4Imm:$offset),
+ "$dst = memub($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+// Load unsigned byte conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1) $dst = memub($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1) $dst = memub($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_indexed_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if ($src1) $dst = memub($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if (!$src1) $dst = memub($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if ($src1) $dst1 = memub($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if (!$src1) $dst1 = memub($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1.new) $dst = memub($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1.new) $dst = memub($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if ($src1.new) $dst = memub($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if (!$src1.new) $dst = memub($src2+#$src3)",
+ []>;
+
+// Load unsigned halfword.
+let isPredicable = 1 in
+def LDriuh : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memuh($addr)",
+ [(set IntRegs:$dst, (zextloadi16 ADDRriS11_1:$addr))]>;
+
+// Indexed load unsigned halfword.
+let isPredicable = 1, AddedComplexity = 20 in
+def LDriuh_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_1Imm:$offset),
+ "$dst=memuh($src1+#$offset)",
+ [(set IntRegs:$dst, (zextloadi16 (add IntRegs:$src1,
+ s11_1ImmPred:$offset)))]>;
+
+def LDriuh_ae : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memuh($addr)",
+ [(set IntRegs:$dst, (extloadi16 ADDRriS11_1:$addr))]>;
+
+
+// Indexed load unsigned halfword any-extend.
+let AddedComplexity = 20 in
+def LDriuh_ae_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_1Imm:$offset),
+ "$dst=memuh($src1+#$offset)",
+ [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1,
+ s11_1ImmPred:$offset)))] >;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memuh(#$global+$offset)",
+ []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, s4Imm:$offset),
+ "$dst = memuh($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+// Load unsigned halfword conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1) $dst = memuh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1) $dst = memuh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_indexed_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if ($src1) $dst = memuh($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if (!$src1) $dst = memuh($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if ($src1) $dst1 = memuh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if (!$src1) $dst1 = memuh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1.new) $dst = memuh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1.new) $dst = memuh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if ($src1.new) $dst = memuh($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if (!$src1.new) $dst = memuh($src2+#$src3)",
+ []>;
+
+
+// Load word.
+let isPredicable = 1 in
+def LDriw : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr), "$dst = memw($addr)",
+ [(set IntRegs:$dst, (load ADDRriS11_2:$addr))]>;
+
+// Load predicate.
+let mayLoad = 1, Defs = [R10,R11] in
+def LDriw_pred : LDInst<(outs PredRegs:$dst),
+ (ins MEMri:$addr),
+ "Error; should not emit",
+ []>;
+
+// Indexed load.
+let isPredicable = 1, AddedComplexity = 20 in
+def LDriw_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_2Imm:$offset),
+ "$dst=memw($src1+#$offset)",
+ [(set IntRegs:$dst, (load (add IntRegs:$src1,
+ s11_2ImmPred:$offset)))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memw(#$global+$offset)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDw_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memw(#$global)",
+ []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, s4Imm:$offset),
+ "$dst = memw($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+// Load word conditionally.
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1) $dst = memw($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1) $dst = memw($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_indexed_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
+ "if ($src1) $dst=memw($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
+ "if (!$src1) $dst=memw($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
+ "if ($src1) $dst1 = memw($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
+ "if (!$src1) $dst1 = memw($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1.new) $dst = memw($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1.new) $dst = memw($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
+ "if ($src1.new) $dst=memw($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
+ "if (!$src1.new) $dst=memw($src2+#$src3)",
+ []>;
+
+// Deallocate stack frame.
+let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in {
+ def DEALLOCFRAME : LDInst<(outs), (ins i32imm:$amt1),
+ "deallocframe",
+ []>;
+}
+
+// Load and unpack bytes to halfwords.
+//===----------------------------------------------------------------------===//
+// LD -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/ALU +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/COMPLEX +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/COMPLEX -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYH +
+//===----------------------------------------------------------------------===//
+// Multiply and use lower result.
+// Rd=+mpyi(Rs,#u8)
+def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
+ "$dst =+ mpyi($src1, #$src2)",
+ [(set IntRegs:$dst, (mul IntRegs:$src1, u8ImmPred:$src2))]>;
+
+// Rd=-mpyi(Rs,#u8)
+def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, n8Imm:$src2),
+ "$dst =- mpyi($src1, #$src2)",
+ [(set IntRegs:$dst,
+ (mul IntRegs:$src1, n8ImmPred:$src2))]>;
+
+// Rd=mpyi(Rs,#m9)
+// s9 is NOT the same as m9 - but it works.. so far.
+// Assembler maps to either Rd=+mpyi(Rs,#u8 or Rd=-mpyi(Rs,#u8)
+// depending on the value of m9. See Arch Spec.
+def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2),
+ "$dst = mpyi($src1, #$src2)",
+ [(set IntRegs:$dst, (mul IntRegs:$src1, s9ImmPred:$src2))]>;
+
+// Rd=mpyi(Rs,Rt)
+def MPYI : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpyi($src1, $src2)",
+ [(set IntRegs:$dst, (mul IntRegs:$src1, IntRegs:$src2))]>;
+
+// Rx+=mpyi(Rs,#u8)
+def MPYI_acc_ri : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3),
+ "$dst += mpyi($src2, #$src3)",
+ [(set IntRegs:$dst,
+ (add (mul IntRegs:$src2, u8ImmPred:$src3), IntRegs:$src1))],
+ "$src1 = $dst">;
+
+// Rx+=mpyi(Rs,Rt)
+def MPYI_acc_rr : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst += mpyi($src2, $src3)",
+ [(set IntRegs:$dst,
+ (add (mul IntRegs:$src2, IntRegs:$src3), IntRegs:$src1))],
+ "$src1 = $dst">;
+
+// Rx-=mpyi(Rs,#u8)
+def MPYI_sub_ri : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3),
+ "$dst -= mpyi($src2, #$src3)",
+ [(set IntRegs:$dst,
+ (sub IntRegs:$src1, (mul IntRegs:$src2, u8ImmPred:$src3)))],
+ "$src1 = $dst">;
+
+// Multiply and use upper result.
+// Rd=mpy(Rs,Rt.H):<<1:rnd:sat
+// Rd=mpy(Rs,Rt.L):<<1:rnd:sat
+// Rd=mpy(Rs,Rt)
+def MPY : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpy($src1, $src2)",
+ [(set IntRegs:$dst, (mulhs IntRegs:$src1, IntRegs:$src2))]>;
+
+// Rd=mpy(Rs,Rt):rnd
+// Rd=mpyu(Rs,Rt)
+def MPYU : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpyu($src1, $src2)",
+ [(set IntRegs:$dst, (mulhu IntRegs:$src1, IntRegs:$src2))]>;
+
+// Multiply and use full result.
+// Rdd=mpyu(Rs,Rt)
+def MPYU64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpyu($src1, $src2)",
+ [(set DoubleRegs:$dst, (mul (i64 (anyext IntRegs:$src1)),
+ (i64 (anyext IntRegs:$src2))))]>;
+
+// Rdd=mpy(Rs,Rt)
+def MPY64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpy($src1, $src2)",
+ [(set DoubleRegs:$dst, (mul (i64 (sext IntRegs:$src1)),
+ (i64 (sext IntRegs:$src2))))]>;
+
+
+// Multiply and accumulate, use full result.
+// Rxx[+-]=mpy(Rs,Rt)
+// Rxx+=mpy(Rs,Rt)
+def MPY64_acc : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst += mpy($src2, $src3)",
+ [(set DoubleRegs:$dst,
+ (add (mul (i64 (sext IntRegs:$src2)), (i64 (sext IntRegs:$src3))),
+ DoubleRegs:$src1))],
+ "$src1 = $dst">;
+
+// Rxx-=mpy(Rs,Rt)
+def MPY64_sub : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst -= mpy($src2, $src3)",
+ [(set DoubleRegs:$dst,
+ (sub DoubleRegs:$src1,
+ (mul (i64 (sext IntRegs:$src2)), (i64 (sext IntRegs:$src3)))))],
+ "$src1 = $dst">;
+
+// Rxx[+-]=mpyu(Rs,Rt)
+// Rxx+=mpyu(Rs,Rt)
+def MPYU64_acc : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ IntRegs:$src2, IntRegs:$src3),
+ "$dst += mpyu($src2, $src3)",
+ [(set DoubleRegs:$dst, (add (mul (i64 (anyext IntRegs:$src2)),
+ (i64 (anyext IntRegs:$src3))),
+ DoubleRegs:$src1))],"$src1 = $dst">;
+
+// Rxx-=mpyu(Rs,Rt)
+def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst += mpyu($src2, $src3)",
+ [(set DoubleRegs:$dst,
+ (sub DoubleRegs:$src1,
+ (mul (i64 (anyext IntRegs:$src2)),
+ (i64 (anyext IntRegs:$src3)))))],
+ "$src1 = $dst">;
+
+
+def ADDrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+ IntRegs:$src2, IntRegs:$src3),
+ "$dst += add($src2, $src3)",
+ [(set IntRegs:$dst, (add (add IntRegs:$src2, IntRegs:$src3),
+ IntRegs:$src1))],
+ "$src1 = $dst">;
+
+def ADDri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+ IntRegs:$src2, s8Imm:$src3),
+ "$dst += add($src2, #$src3)",
+ [(set IntRegs:$dst, (add (add IntRegs:$src2, s8ImmPred:$src3),
+ IntRegs:$src1))],
+ "$src1 = $dst">;
+
+def SUBrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+ IntRegs:$src2, IntRegs:$src3),
+ "$dst -= add($src2, $src3)",
+ [(set IntRegs:$dst, (sub IntRegs:$src1, (add IntRegs:$src2,
+ IntRegs:$src3)))],
+ "$src1 = $dst">;
+
+def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+ IntRegs:$src2, s8Imm:$src3),
+ "$dst -= add($src2, #$src3)",
+ [(set IntRegs:$dst, (sub IntRegs:$src1,
+ (add IntRegs:$src2, s8ImmPred:$src3)))],
+ "$src1 = $dst">;
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYS +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYS -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/VB +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/VB -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/VH +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/VH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ST +
+//===----------------------------------------------------------------------===//
+///
+/// Assumptions::: ****** DO NOT IGNORE ********
+/// 1. Make sure that in post increment store, the zero'th operand is always the
+/// post increment operand.
+/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the
+/// last operand.
+///
+// Store doubleword.
+let isPredicable = 1 in
+def STrid : STInst<(outs),
+ (ins MEMri:$addr, DoubleRegs:$src1),
+ "memd($addr) = $src1",
+ [(store DoubleRegs:$src1, ADDRriS11_3:$addr)]>;
+
+// Indexed store double word.
+let AddedComplexity = 10, isPredicable = 1 in
+def STrid_indexed : STInst<(outs),
+ (ins IntRegs:$src1, s11_3Imm:$src2, DoubleRegs:$src3),
+ "memd($src1+#$src2) = $src3",
+ [(store DoubleRegs:$src3,
+ (add IntRegs:$src1, s11_3ImmPred:$src2))]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrid_GP : STInst<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src),
+ "memd(#$global+$offset) = $src",
+ []>;
+
+let hasCtrlDep = 1, isPredicable = 1 in
+def POST_STdri : STInstPI<(outs IntRegs:$dst),
+ (ins DoubleRegs:$src1, IntRegs:$src2, s4Imm:$offset),
+ "memd($src2++#$offset) = $src1",
+ [(set IntRegs:$dst,
+ (post_store DoubleRegs:$src1, IntRegs:$src2, s4_3ImmPred:$offset))],
+ "$src2 = $dst">;
+
+// Store doubleword conditionally.
+// if ([!]Pv) memd(Rs+#u6:3)=Rtt
+// if (Pv) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_cPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
+ "if ($src1) memd($addr) = $src2",
+ []>;
+
+// if (!Pv) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
+ "if (!$src1) memd($addr) = $src2",
+ []>;
+
+// if (Pv) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_cPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
+ DoubleRegs:$src4),
+ "if ($src1) memd($src2+#$src3) = $src4",
+ []>;
+
+// if (!Pv) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
+ DoubleRegs:$src4),
+ "if (!$src1) memd($src2+#$src3) = $src4",
+ []>;
+
+// if ([!]Pv) memd(Rx++#s4:3)=Rtt
+// if (Pv) memd(Rx++#s4:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def POST_STdri_cPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
+ s4_3Imm:$offset),
+ "if ($src1) memd($src3++#$offset) = $src2",
+ [],
+ "$src3 = $dst">;
+
+// if (!Pv) memd(Rx++#s4:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def POST_STdri_cNotPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
+ s4_3Imm:$offset),
+ "if (!$src1) memd($src3++#$offset) = $src2",
+ [],
+ "$src3 = $dst">;
+
+
+// Store byte.
+// memb(Rs+#s11:0)=Rt
+let isPredicable = 1 in
+def STrib : STInst<(outs),
+ (ins MEMri:$addr, IntRegs:$src1),
+ "memb($addr) = $src1",
+ [(truncstorei8 IntRegs:$src1, ADDRriS11_0:$addr)]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def STrib_indexed : STInst<(outs),
+ (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3),
+ "memb($src1+#$src2) = $src3",
+ [(truncstorei8 IntRegs:$src3, (add IntRegs:$src1,
+ s11_0ImmPred:$src2))]>;
+
+// memb(gp+#u16:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP : STInst<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memb(#$global+$offset) = $src",
+ []>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP : STInst<(outs),
+ (ins globaladdress:$global, IntRegs:$src),
+ "memb(#$global) = $src",
+ []>;
+
+// memb(Rx++#s4:0)=Rt
+let hasCtrlDep = 1, isPredicable = 1 in
+def POST_STbri : STInstPI<(outs IntRegs:$dst), (ins IntRegs:$src1,
+ IntRegs:$src2,
+ s4Imm:$offset),
+ "memb($src2++#$offset) = $src1",
+ [(set IntRegs:$dst,
+ (post_truncsti8 IntRegs:$src1, IntRegs:$src2,
+ s4_0ImmPred:$offset))],
+ "$src2 = $dst">;
+
+// Store byte conditionally.
+// if ([!]Pv) memb(Rs+#u6:0)=Rt
+// if (Pv) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1) memb($addr) = $src2",
+ []>;
+
+// if (!Pv) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1) memb($addr) = $src2",
+ []>;
+
+// if (Pv) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if ($src1) memb($src2+#$src3) = $src4",
+ []>;
+
+// if (!Pv) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if (!$src1) memb($src2+#$src3) = $src4",
+ []>;
+
+// if ([!]Pv) memb(Rx++#s4:0)=Rt
+// if (Pv) memb(Rx++#s4:0)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if ($src1) memb($src3++#$offset) = $src2",
+ [],"$src3 = $dst">;
+
+// if (!Pv) memb(Rx++#s4:0)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cNotPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if (!$src1) memb($src3++#$offset) = $src2",
+ [],"$src3 = $dst">;
+
+
+// Store halfword.
+// memh(Rs+#s11:1)=Rt
+let isPredicable = 1 in
+def STrih : STInst<(outs),
+ (ins MEMri:$addr, IntRegs:$src1),
+ "memh($addr) = $src1",
+ [(truncstorei16 IntRegs:$src1, ADDRriS11_1:$addr)]>;
+
+
+let AddedComplexity = 10, isPredicable = 1 in
+def STrih_indexed : STInst<(outs),
+ (ins IntRegs:$src1, s11_1Imm:$src2, IntRegs:$src3),
+ "memh($src1+#$src2) = $src3",
+ [(truncstorei16 IntRegs:$src3, (add IntRegs:$src1,
+ s11_1ImmPred:$src2))]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP : STInst<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memh(#$global+$offset) = $src",
+ []>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP : STInst<(outs),
+ (ins globaladdress:$global, IntRegs:$src),
+ "memh(#$global) = $src",
+ []>;
+
+// memh(Rx++#s4:1)=Rt.H
+// memh(Rx++#s4:1)=Rt
+let hasCtrlDep = 1, isPredicable = 1 in
+def POST_SThri : STInstPI<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset),
+ "memh($src2++#$offset) = $src1",
+ [(set IntRegs:$dst,
+ (post_truncsti16 IntRegs:$src1, IntRegs:$src2,
+ s4_1ImmPred:$offset))],
+ "$src2 = $dst">;
+
+// Store halfword conditionally.
+// if ([!]Pv) memh(Rs+#u6:1)=Rt
+// if (Pv) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1) memh($addr) = $src2",
+ []>;
+
+// if (!Pv) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1) memh($addr) = $src2",
+ []>;
+
+// if (Pv) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if ($src1) memh($src2+#$src3) = $src4",
+ []>;
+
+// if (!Pv) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if (!$src1) memh($src2+#$src3) = $src4",
+ []>;
+
+// if ([!]Pv) memh(Rx++#s4:1)=Rt
+// if (Pv) memh(Rx++#s4:1)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if ($src1) memh($src3++#$offset) = $src2",
+ [],"$src3 = $dst">;
+
+// if (!Pv) memh(Rx++#s4:1)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cNotPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if (!$src1) memh($src3++#$offset) = $src2",
+ [],"$src3 = $dst">;
+
+
+// Store word.
+// Store predicate.
+let Defs = [R10,R11] in
+def STriw_pred : STInst<(outs),
+ (ins MEMri:$addr, PredRegs:$src1),
+ "Error; should not emit",
+ []>;
+
+// memw(Rs+#s11:2)=Rt
+let isPredicable = 1 in
+def STriw : STInst<(outs),
+ (ins MEMri:$addr, IntRegs:$src1),
+ "memw($addr) = $src1",
+ [(store IntRegs:$src1, ADDRriS11_2:$addr)]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def STriw_indexed : STInst<(outs),
+ (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
+ "memw($src1+#$src2) = $src3",
+ [(store IntRegs:$src3, (add IntRegs:$src1, s11_2ImmPred:$src2))]>;
+
+def STriwt : STInst<(outs),
+ (ins MEMri:$addr, DoubleRegs:$src1),
+ "memw($addr) = $src1",
+ [(truncstorei32 DoubleRegs:$src1, ADDRriS11_2:$addr)]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP : STInst<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memw(#$global+$offset) = $src",
+ []>;
+
+let hasCtrlDep = 1, isPredicable = 1 in
+def POST_STwri : STInstPI<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset),
+ "memw($src2++#$offset) = $src1",
+ [(set IntRegs:$dst,
+ (post_store IntRegs:$src1, IntRegs:$src2, s4_2ImmPred:$offset))],
+ "$src2 = $dst">;
+
+// Store word conditionally.
+// if ([!]Pv) memw(Rs+#u6:2)=Rt
+// if (Pv) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1) memw($addr) = $src2",
+ []>;
+
+// if (!Pv) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1) memw($addr) = $src2",
+ []>;
+
+// if (Pv) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if ($src1) memw($src2+#$src3) = $src4",
+ []>;
+
+// if (!Pv) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if (!$src1) memw($src2+#$src3) = $src4",
+ []>;
+
+// if ([!]Pv) memw(Rx++#s4:2)=Rt
+// if (Pv) memw(Rx++#s4:2)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if ($src1) memw($src3++#$offset) = $src2",
+ [],"$src3 = $dst">;
+
+// if (!Pv) memw(Rx++#s4:2)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cNotPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if (!$src1) memw($src3++#$offset) = $src2",
+ [],"$src3 = $dst">;
+
+
+
+// Allocate stack frame.
+let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in {
+ def ALLOCFRAME : STInst<(outs),
+ (ins i32imm:$amt),
+ "allocframe(#$amt)",
+ []>;
+}
+//===----------------------------------------------------------------------===//
+// ST -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/ALU +
+//===----------------------------------------------------------------------===//
+// Logical NOT.
+def NOT_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
+ "$dst = not($src1)",
+ [(set DoubleRegs:$dst, (not DoubleRegs:$src1))]>;
+
+
+// Sign extend word to doubleword.
+def SXTW : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
+ "$dst = sxtw($src1)",
+ [(set DoubleRegs:$dst, (sext IntRegs:$src1))]>;
+//===----------------------------------------------------------------------===//
+// STYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/BIT +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/BIT -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// STYPE/COMPLEX +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/COMPLEX -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/PERM +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/PERM -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/PRED +
+//===----------------------------------------------------------------------===//
+// Predicate transfer.
+let neverHasSideEffects = 1 in
+def TFR_RsPd : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1),
+ "$dst = $src1 // Should almost never emit this",
+ []>;
+
+def TFR_PdRs : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1),
+ "$dst = $src1 // Should almost never emit!",
+ [(set PredRegs:$dst, (trunc IntRegs:$src1))]>;
+//===----------------------------------------------------------------------===//
+// STYPE/PRED -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/SHIFT +
+//===----------------------------------------------------------------------===//
+// Shift by immediate.
+def ASR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = asr($src1, #$src2)",
+ [(set IntRegs:$dst, (sra IntRegs:$src1, u5ImmPred:$src2))]>;
+
+def ASRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+ "$dst = asr($src1, #$src2)",
+ [(set DoubleRegs:$dst, (sra DoubleRegs:$src1, u6ImmPred:$src2))]>;
+
+def ASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = asl($src1, #$src2)",
+ [(set IntRegs:$dst, (shl IntRegs:$src1, u5ImmPred:$src2))]>;
+
+def LSR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = lsr($src1, #$src2)",
+ [(set IntRegs:$dst, (srl IntRegs:$src1, u5ImmPred:$src2))]>;
+
+def LSRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+ "$dst = lsr($src1, #$src2)",
+ [(set DoubleRegs:$dst, (srl DoubleRegs:$src1, u6ImmPred:$src2))]>;
+
+def LSRd_ri_acc : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2,
+ u6Imm:$src3),
+ "$dst += lsr($src2, #$src3)",
+ [(set DoubleRegs:$dst, (add DoubleRegs:$src1,
+ (srl DoubleRegs:$src2,
+ u6ImmPred:$src3)))],
+ "$src1 = $dst">;
+
+// Shift by immediate and accumulate.
+def ASR_rr_acc : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1,
+ IntRegs:$src2,
+ IntRegs:$src3),
+ "$dst += asr($src2, $src3)",
+ [], "$src1 = $dst">;
+
+// Shift by immediate and add.
+def ADDASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ u3Imm:$src3),
+ "$dst = addasl($src1, $src2, #$src3)",
+ [(set IntRegs:$dst, (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u3ImmPred:$src3)))]>;
+
+// Shift by register.
+def ASL_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = asl($src1, $src2)",
+ [(set IntRegs:$dst, (shl IntRegs:$src1, IntRegs:$src2))]>;
+
+def ASR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = asr($src1, $src2)",
+ [(set IntRegs:$dst, (sra IntRegs:$src1, IntRegs:$src2))]>;
+
+
+def LSR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = lsr($src1, $src2)",
+ [(set IntRegs:$dst, (srl IntRegs:$src1, IntRegs:$src2))]>;
+
+def LSLd : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ "$dst = lsl($src1, $src2)",
+ [(set DoubleRegs:$dst, (shl DoubleRegs:$src1, IntRegs:$src2))]>;
+
+def ASRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ IntRegs:$src2),
+ "$dst = asr($src1, $src2)",
+ [(set DoubleRegs:$dst, (sra DoubleRegs:$src1, IntRegs:$src2))]>;
+
+def LSRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ IntRegs:$src2),
+ "$dst = lsr($src1, $src2)",
+ [(set DoubleRegs:$dst, (srl DoubleRegs:$src1, IntRegs:$src2))]>;
+
+//===----------------------------------------------------------------------===//
+// STYPE/SHIFT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/VH +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/VH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/VW +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/VW -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/SUPER +
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/USER +
+//===----------------------------------------------------------------------===//
+def SDHexagonBARRIER: SDTypeProfile<0, 0, []>;
+def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDHexagonBARRIER,
+ [SDNPHasChain]>;
+
+let hasSideEffects = 1 in
+def BARRIER : STInst<(outs), (ins),
+ "barrier",
+ [(HexagonBARRIER)]>;
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/SUPER -
+//===----------------------------------------------------------------------===//
+
+// TFRI64 - assembly mapped.
+let isReMaterializable = 1 in
+def TFRI64 : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1),
+ "$dst = #$src1",
+ [(set DoubleRegs:$dst, s8Imm64Pred:$src1)]>;
+
+// Pseudo instruction to encode a set of conditional transfers.
+// This instruction is used instead of a mux and trades-off codesize
+// for performance. We conduct this transformation optimistically in
+// the hope that these instructions get promoted to dot-new transfers.
+let AddedComplexity = 100 in
+def TFR_condset_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ IntRegs:$src2,
+ IntRegs:$src3),
+ "Error; should not emit",
+ [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+let AddedComplexity = 100 in
+def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, s12Imm:$src2, s12Imm:$src3),
+ "Error; should not emit",
+ [(set IntRegs:$dst, (select PredRegs:$src1,
+ s12ImmPred:$src2,
+ s12ImmPred:$src3))]>;
+
+// Generate frameindex addresses.
+let isReMaterializable = 1 in
+def TFR_FI : ALU32_ri<(outs IntRegs:$dst), (ins FrameIndex:$src1),
+ "$dst = add($src1)",
+ [(set IntRegs:$dst, ADDRri:$src1)]>;
+
+//
+// CR - Type.
+//
+let neverHasSideEffects = 1, Defs = [SA0, LC0] in {
+def LOOP0_i : CRInst<(outs), (ins brtarget:$offset, u10Imm:$src2),
+ "loop0($offset, #$src2)",
+ []>;
+}
+
+let neverHasSideEffects = 1, Defs = [SA0, LC0] in {
+def LOOP0_r : CRInst<(outs), (ins brtarget:$offset, IntRegs:$src2),
+ "loop0($offset, $src2)",
+ []>;
+}
+
+let isBranch = 1, isTerminator = 1, neverHasSideEffects = 1,
+ Defs = [PC, LC0], Uses = [SA0, LC0] in {
+def ENDLOOP0 : CRInst<(outs), (ins brtarget:$offset),
+ ":endloop0",
+ []>;
+}
+
+// Support for generating global address.
+// Taken from X86InstrInfo.td.
+def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<0>]>;
+def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>;
+def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;
+
+// This pattern is incorrect. When we add small data, we should change
+// this pattern to use memw(#foo).
+let isMoveImm = 1 in
+def CONST32 : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst = CONST32(#$global)",
+ [(set IntRegs:$dst,
+ (load (HexagonCONST32 tglobaltlsaddr:$global)))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_set : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst = CONST32(#$global)",
+ [(set IntRegs:$dst,
+ (HexagonCONST32 tglobaladdr:$global))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_set_jt : LDInst<(outs IntRegs:$dst), (ins jumptablebase:$jt),
+ "$dst = CONST32(#$jt)",
+ [(set IntRegs:$dst,
+ (HexagonCONST32 tjumptable:$jt))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32GP_set : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst = CONST32(#$global)",
+ [(set IntRegs:$dst,
+ (HexagonCONST32_GP tglobaladdr:$global))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_Int_Real : LDInst<(outs IntRegs:$dst), (ins i32imm:$global),
+ "$dst = CONST32(#$global)",
+ [(set IntRegs:$dst, imm:$global) ]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_Label : LDInst<(outs IntRegs:$dst), (ins bblabel:$label),
+ "$dst = CONST32($label)",
+ [(set IntRegs:$dst, (HexagonCONST32 bbl:$label))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST64_Int_Real : LDInst<(outs DoubleRegs:$dst), (ins i64imm:$global),
+ "$dst = CONST64(#$global)",
+ [(set DoubleRegs:$dst, imm:$global) ]>;
+
+def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins),
+ "$dst = xor($dst, $dst)",
+ [(set PredRegs:$dst, 0)]>;
+
+def MPY_trsext : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpy($src1, $src2)",
+ [(set IntRegs:$dst,
+ (trunc (i64 (srl (i64 (mul (i64 (sext IntRegs:$src1)),
+ (i64 (sext IntRegs:$src2)))),
+ (i32 32)))))]>;
+
+// Pseudo instructions.
+def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+
+def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+
+def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+def call : SDNode<"HexagonISD::CALL", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain,
+// Optional Flag and Variable Arguments.
+// Its 1 Operand has pointer type.
+def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+let Defs = [R29, R30], Uses = [R31, R30, R29] in {
+ def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+ "Should never be emitted",
+ [(callseq_start timm:$amt)]>;
+}
+
+let Defs = [R29, R30, R31], Uses = [R29] in {
+ def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "Should never be emitted",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+// Call subroutine.
+let isCall = 1, neverHasSideEffects = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+ R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def CALL : JInst<(outs), (ins calltarget:$dst, variable_ops),
+ "call $dst", []>;
+}
+
+// Call subroutine from register.
+let isCall = 1, neverHasSideEffects = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+ R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def CALLR : JRInst<(outs), (ins IntRegs:$dst, variable_ops),
+ "callr $dst",
+ []>;
+ }
+
+// Tail Calls.
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+ R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def TCRETURNtg : JInst<(outs), (ins calltarget:$dst, variable_ops),
+ "jump $dst // TAILCALL", []>;
+}
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+ R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def TCRETURNtext : JInst<(outs), (ins calltarget:$dst, variable_ops),
+ "jump $dst // TAILCALL", []>;
+}
+
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+ R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def TCRETURNR : JInst<(outs), (ins IntRegs:$dst, variable_ops),
+ "jumpr $dst // TAILCALL", []>;
+}
+// Map call instruction.
+def : Pat<(call IntRegs:$dst),
+ (CALLR IntRegs:$dst)>, Requires<[HasV2TOnly]>;
+def : Pat<(call tglobaladdr:$dst),
+ (CALL tglobaladdr:$dst)>, Requires<[HasV2TOnly]>;
+def : Pat<(call texternalsym:$dst),
+ (CALL texternalsym:$dst)>, Requires<[HasV2TOnly]>;
+//Tail calls.
+def : Pat<(HexagonTCRet tglobaladdr:$dst),
+ (TCRETURNtg tglobaladdr:$dst)>;
+def : Pat<(HexagonTCRet texternalsym:$dst),
+ (TCRETURNtext texternalsym:$dst)>;
+def : Pat<(HexagonTCRet IntRegs:$dst),
+ (TCRETURNR IntRegs:$dst)>;
+
+// Map from r0 = and(r1, 65535) to r0 = zxth(r1).
+def : Pat <(and IntRegs:$src1, 65535),
+ (ZXTH IntRegs:$src1)>;
+
+// Map from r0 = and(r1, 255) to r0 = zxtb(r1).
+def : Pat <(and IntRegs:$src1, 255),
+ (ZXTB IntRegs:$src1)>;
+
+// Map Add(p1, true) to p1 = not(p1).
+// Add(p1, false) should never be produced,
+// if it does, it got to be mapped to NOOP.
+def : Pat <(add PredRegs:$src1, -1),
+ (NOT_pp PredRegs:$src1)>;
+
+// Map from p0 = setlt(r0, r1) r2 = mux(p0, r3, r4) =>
+// p0 = cmp.lt(r0, r1), r0 = mux(p0, r2, r1).
+def : Pat <(select (i1 (setlt IntRegs:$src1, IntRegs:$src2)), IntRegs:$src3,
+ IntRegs:$src4),
+ (TFR_condset_rr (CMPLTrr IntRegs:$src1, IntRegs:$src2), IntRegs:$src4,
+ IntRegs:$src3)>, Requires<[HasV2TOnly]>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
+def : Pat <(select (not PredRegs:$src1), s8ImmPred:$src2, s8ImmPred:$src3),
+ (TFR_condset_ii PredRegs:$src1, s8ImmPred:$src3, s8ImmPred:$src2)>;
+
+// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
+def : Pat <(brcond (not PredRegs:$src1), bb:$offset),
+ (JMP_PredNot PredRegs:$src1, bb:$offset)>;
+
+// Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2).
+def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)),
+ (AND_pnotp PredRegs:$src1, PredRegs:$src2)>;
+
+// Map from store(globaladdress + x) -> memd(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(store DoubleRegs:$src1,
+ (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (STrid_GP tglobaladdr:$global, u16ImmPred:$offset, DoubleRegs:$src1)>;
+
+// Map from store(globaladdress) -> memd(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(store DoubleRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)),
+ (STrid_GP tglobaladdr:$global, 0, DoubleRegs:$src1)>;
+
+// Map from store(globaladdress + x) -> memw(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(store IntRegs:$src1, (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (STriw_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>;
+
+// Map from store(globaladdress) -> memw(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(store IntRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)),
+ (STriw_GP tglobaladdr:$global, 0, IntRegs:$src1)>;
+
+// Map from store(globaladdress) -> memw(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(store IntRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)),
+ (STriw_GP tglobaladdr:$global, 0, IntRegs:$src1)>;
+
+// Map from store(globaladdress + x) -> memh(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(truncstorei16 IntRegs:$src1,
+ (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (STrih_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>;
+
+// Map from store(globaladdress) -> memh(#foo).
+let AddedComplexity = 100 in
+def : Pat <(truncstorei16 IntRegs:$src1,
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STh_GP tglobaladdr:$global, IntRegs:$src1)>;
+
+// Map from store(globaladdress + x) -> memb(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(truncstorei8 IntRegs:$src1,
+ (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (STrib_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>;
+
+// Map from store(globaladdress) -> memb(#foo).
+let AddedComplexity = 100 in
+def : Pat <(truncstorei8 IntRegs:$src1,
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STb_GP tglobaladdr:$global, IntRegs:$src1)>;
+
+// Map from load(globaladdress + x) -> memw(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(load (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memw(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(load (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDw_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress + x) -> memd(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memw(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (LDd_GP tglobaladdr:$global)>;
+
+
+// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress + 0), Pd = Rd.
+let AddedComplexity = 100 in
+def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (TFR_PdRs (LDrib_GP tglobaladdr:$global, 0))>;
+
+// Map from load(globaladdress + x) -> memh(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (LDrih_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memh(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDrih_GP tglobaladdr:$global, 0)>;
+
+// Map from load(globaladdress + x) -> memuh(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memuh(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDriuh_GP tglobaladdr:$global, 0)>;
+
+// Map from load(globaladdress + x) -> memuh(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memuh(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(extloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDriuh_GP tglobaladdr:$global, 0)>;
+// Map from load(globaladdress + x) -> memub(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memuh(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDriub_GP tglobaladdr:$global, 0)>;
+
+// Map from load(globaladdress + x) -> memb(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memb(#foo).
+let AddedComplexity = 100 in
+def : Pat <(extloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDb_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memb(#foo).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDb_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memub(#foo).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDub_GP tglobaladdr:$global)>;
+
+// When the Interprocedural Global Variable optimizer realizes that a
+// certain global variable takes only two constant values, it shrinks the
+// global to a boolean. Catch those loads here in the following 3 patterns.
+let AddedComplexity = 100 in
+def : Pat <(extloadi1 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDb_GP tglobaladdr:$global)>;
+
+let AddedComplexity = 100 in
+def : Pat <(sextloadi1 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDb_GP tglobaladdr:$global)>;
+
+let AddedComplexity = 100 in
+def : Pat <(zextloadi1 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDub_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memh(#foo).
+let AddedComplexity = 100 in
+def : Pat <(extloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDh_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memh(#foo).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDh_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memuh(#foo).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDuh_GP tglobaladdr:$global)>;
+
+// Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned.
+def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)),
+ (AND_rr (LDrib ADDRriS11_0:$addr), (TFRI 0x1))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = SXTW(Rss.lo).
+def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i32)),
+ (i64 (SXTW (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg)))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = SXTW(SXTH(Rss.lo)).
+def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i16)),
+ (i64 (SXTW (SXTH (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg))))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = SXTW(SXTB(Rss.lo)).
+def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i8)),
+ (i64 (SXTW (SXTB (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg))))>;
+
+// We want to prevent emiting pnot's as much as possible.
+// Map brcond with an unsupported setcc to a JMP_PredNot.
+def : Pat <(brcond (i1 (setne IntRegs:$src1, IntRegs:$src2)), bb:$offset),
+ (JMP_PredNot (CMPEQrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne IntRegs:$src1, s10ImmPred:$src2)), bb:$offset),
+ (JMP_PredNot (CMPEQri IntRegs:$src1, s10ImmPred:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne PredRegs:$src1, (i1 -1))), bb:$offset),
+ (JMP_PredNot PredRegs:$src1, bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne PredRegs:$src1, (i1 0))), bb:$offset),
+ (JMP_Pred PredRegs:$src1, bb:$offset)>;
+
+def : Pat <(brcond (i1 (setlt IntRegs:$src1, s8ImmPred:$src2)), bb:$offset),
+ (JMP_PredNot (CMPGEri IntRegs:$src1, s8ImmPred:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setlt IntRegs:$src1, IntRegs:$src2)), bb:$offset),
+ (JMP_Pred (CMPLTrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setuge DoubleRegs:$src1, DoubleRegs:$src2)),
+ bb:$offset),
+ (JMP_PredNot (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1),
+ bb:$offset)>;
+
+def : Pat <(brcond (i1 (setule IntRegs:$src1, IntRegs:$src2)), bb:$offset),
+ (JMP_PredNot (CMPGTUrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setule DoubleRegs:$src1, DoubleRegs:$src2)),
+ bb:$offset),
+ (JMP_PredNot (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2),
+ bb:$offset)>;
+
+// Map from a 64-bit select to an emulated 64-bit mux.
+// Hexagon does not support 64-bit MUXes; so emulate with combines.
+def : Pat <(select PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (COMBINE_rr
+ (MUX_rr PredRegs:$src1,
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$src3, subreg_hireg)),
+ (MUX_rr PredRegs:$src1,
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src3, subreg_loreg)))>;
+
+// Map from a 1-bit select to logical ops.
+// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3).
+def : Pat <(select PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (OR_pp (AND_pp PredRegs:$src1, PredRegs:$src2),
+ (AND_pp (NOT_pp PredRegs:$src1), PredRegs:$src3))>;
+
+// Map Pd = load(addr) -> Rs = load(addr); Pd = Rs.
+def : Pat<(i1 (load ADDRriS11_2:$addr)),
+ (i1 (TFR_PdRs (i32 (LDrib ADDRriS11_2:$addr))))>;
+
+// Map for truncating from 64 immediates to 32 bit immediates.
+def : Pat<(i32 (trunc DoubleRegs:$src)),
+ (i32 (EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg))>;
+
+// Map for truncating from i64 immediates to i1 bit immediates.
+def : Pat<(i1 (trunc DoubleRegs:$src)),
+ (i1 (TFR_PdRs (i32(EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg))))>;
+
+// Map memw(Rs) = Rdd -> memw(Rs) = Rt.
+def : Pat<(truncstorei8 DoubleRegs:$src, ADDRriS11_0:$addr),
+ (STrib ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src,
+ subreg_loreg)))>;
+
+// Map memh(Rs) = Rdd -> memh(Rs) = Rt.
+def : Pat<(truncstorei16 DoubleRegs:$src, ADDRriS11_0:$addr),
+ (STrih ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src,
+ subreg_loreg)))>;
+
+// Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0.
+def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
+ (STrib ADDRriS11_2:$addr, (TFRI 1))>;
+
+let AddedComplexity = 100 in
+// Map from i1 = constant<-1>; memw(CONST32(#foo)) = i1 -> r0 = 1;
+// memw(#foo) = r0
+def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
+ (STb_GP tglobaladdr:$global, (TFRI 1))>;
+
+
+// Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0.
+def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
+ (STrib ADDRriS11_2:$addr, (TFRI 1))>;
+
+// Map from memb(Rs) = Pd -> Rt = mux(Pd, #0, #1); store Rt.
+def : Pat<(store PredRegs:$src1, ADDRriS11_2:$addr),
+ (STrib ADDRriS11_2:$addr, (i32 (MUX_ii PredRegs:$src1, 1, 0)) )>;
+
+// Map Rdd = anyext(Rs) -> Rdd = sxtw(Rs).
+// Hexagon_TODO: We can probably use combine but that will cost 2 instructions.
+// Better way to do this?
+def : Pat<(i64 (anyext IntRegs:$src1)),
+ (i64 (SXTW IntRegs:$src1))>;
+
+// Map cmple -> cmpgt.
+// rs <= rt -> !(rs > rt).
+def : Pat<(i1 (setle IntRegs:$src1, s10ImmPred:$src2)),
+ (i1 (NOT_Ps (CMPGTri IntRegs:$src1, s10ImmPred:$src2)))>;
+
+// rs <= rt -> !(rs > rt).
+def : Pat<(i1 (setle IntRegs:$src1, IntRegs:$src2)),
+ (i1 (NOT_Ps (CMPGTrr IntRegs:$src1, IntRegs:$src2)))>;
+
+// Rss <= Rtt -> !(Rss > Rtt).
+def : Pat<(i1 (setle DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (NOT_Ps (CMPGT64rr DoubleRegs:$src1, DoubleRegs:$src2)))>;
+
+// Map cmpne -> cmpeq.
+// Hexagon_TODO: We should improve on this.
+// rs != rt -> !(rs == rt).
+def : Pat <(i1 (setne IntRegs:$src1, s10ImmPred:$src2)),
+ (i1 (NOT_Ps(i1 (CMPEQri IntRegs:$src1, s10ImmPred:$src2))))>;
+
+// Map cmpne(Rs) -> !cmpeqe(Rs).
+// rs != rt -> !(rs == rt).
+def : Pat <(i1 (setne IntRegs:$src1, IntRegs:$src2)),
+ (i1 (NOT_Ps(i1 (CMPEQrr IntRegs:$src1, IntRegs:$src2))))>;
+
+// Convert setne back to xor for hexagon since we compute w/ pred registers.
+def : Pat <(i1 (setne PredRegs:$src1, PredRegs:$src2)),
+ (i1 (XOR_pp PredRegs:$src1, PredRegs:$src2))>;
+
+// Map cmpne(Rss) -> !cmpew(Rss).
+// rs != rt -> !(rs == rt).
+def : Pat <(i1 (setne DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (NOT_Ps(i1 (CMPEHexagon4rr DoubleRegs:$src1, DoubleRegs:$src2))))>;
+
+// Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setge IntRegs:$src1, IntRegs:$src2)),
+ (i1 (NOT_Ps(i1 (CMPGTrr IntRegs:$src2, IntRegs:$src1))))>;
+
+def : Pat <(i1 (setge IntRegs:$src1, s8ImmPred:$src2)),
+ (i1 (CMPGEri IntRegs:$src1, s8ImmPred:$src2))>;
+
+// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
+// rss >= rtt -> !(rtt > rss).
+def : Pat <(i1 (setge DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (NOT_Ps(i1 (CMPGT64rr DoubleRegs:$src2, DoubleRegs:$src1))))>;
+
+// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
+// rs < rt -> !(rs >= rt).
+def : Pat <(i1 (setlt IntRegs:$src1, s8ImmPred:$src2)),
+ (i1 (NOT_Ps (CMPGEri IntRegs:$src1, s8ImmPred:$src2)))>;
+
+// Map cmplt(Rs, Rt) -> cmplt(Rs, Rt).
+// rs < rt -> rs < rt. Let assembler map it.
+def : Pat <(i1 (setlt IntRegs:$src1, IntRegs:$src2)),
+ (i1 (CMPLTrr IntRegs:$src2, IntRegs:$src1))>;
+
+// Map cmplt(Rss, Rtt) -> cmpgt(Rtt, Rss).
+// rss < rtt -> (rtt > rss).
+def : Pat <(i1 (setlt DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (CMPGT64rr DoubleRegs:$src2, DoubleRegs:$src1))>;
+
+// Map from cmpltu(Rs, Rd) -> !cmpgtu(Rs, Rd - 1).
+// rs < rt -> rt > rs.
+def : Pat <(i1 (setult IntRegs:$src1, IntRegs:$src2)),
+ (i1 (CMPGTUrr IntRegs:$src2, IntRegs:$src1))>;
+
+// Map from cmpltu(Rss, Rdd) -> !cmpgtu(Rss, Rdd - 1).
+// rs < rt -> rt > rs.
+def : Pat <(i1 (setult DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1))>;
+
+// Map from Rs >= Rt -> !(Rt > Rs).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setuge IntRegs:$src1, IntRegs:$src2)),
+ (i1 (NOT_Ps (CMPGTUrr IntRegs:$src2, IntRegs:$src1)))>;
+
+// Map from Rs >= Rt -> !(Rt > Rs).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setuge DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (NOT_Ps (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1)))>;
+
+// Map from cmpleu(Rs, Rs) -> !cmpgtu(Rs, Rs).
+// Map from (Rs <= Rt) -> !(Rs > Rt).
+def : Pat <(i1 (setule IntRegs:$src1, IntRegs:$src2)),
+ (i1 (NOT_Ps (CMPGTUrr IntRegs:$src1, IntRegs:$src2)))>;
+
+// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1).
+// Map from (Rs <= Rt) -> !(Rs > Rt).
+def : Pat <(i1 (setule DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (NOT_Ps (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2)))>;
+
+// Sign extends.
+// i1 -> i32
+def : Pat <(i32 (sext PredRegs:$src1)),
+ (i32 (MUX_ii PredRegs:$src1, -1, 0))>;
+
+// Convert sign-extended load back to load and sign extend.
+// i8 -> i64
+def: Pat <(i64 (sextloadi8 ADDRriS11_0:$src1)),
+ (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>;
+
+// Convert any-extended load back to load and sign extend.
+// i8 -> i64
+def: Pat <(i64 (extloadi8 ADDRriS11_0:$src1)),
+ (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>;
+
+// Convert sign-extended load back to load and sign extend.
+// i16 -> i64
+def: Pat <(i64 (sextloadi16 ADDRriS11_1:$src1)),
+ (i64 (SXTW (LDrih ADDRriS11_1:$src1)))>;
+
+// Convert sign-extended load back to load and sign extend.
+// i32 -> i64
+def: Pat <(i64 (sextloadi32 ADDRriS11_2:$src1)),
+ (i64 (SXTW (LDriw ADDRriS11_2:$src1)))>;
+
+
+// Zero extends.
+// i1 -> i32
+def : Pat <(i32 (zext PredRegs:$src1)),
+ (i32 (MUX_ii PredRegs:$src1, 1, 0))>;
+
+// i1 -> i64
+def : Pat <(i64 (zext PredRegs:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (MUX_ii PredRegs:$src1, 1, 0)))>;
+
+// i32 -> i64
+def : Pat <(i64 (zext IntRegs:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), IntRegs:$src1))>;
+
+// i8 -> i64
+def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>;
+
+// i16 -> i64
+def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>;
+
+// i32 -> i64
+def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>;
+
+def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
+ (i32 (LDriw ADDRriS11_0:$src1))>;
+
+// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
+def : Pat <(i32 (zext PredRegs:$src1)),
+ (i32 (MUX_ii PredRegs:$src1, 1, 0))>;
+
+// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
+def : Pat <(i32 (anyext PredRegs:$src1)),
+ (i32 (MUX_ii PredRegs:$src1, 1, 0))>;
+
+// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
+def : Pat <(i64 (anyext PredRegs:$src1)),
+ (i64 (SXTW (i32 (MUX_ii PredRegs:$src1, 1, 0))))>;
+
+
+// Any extended 64-bit load.
+// anyext i32 -> i64
+def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>;
+
+// anyext i16 -> i64.
+def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>;
+
+// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs).
+def : Pat<(i64 (zext IntRegs:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), IntRegs:$src1))>;
+
+// Multiply 64-bit unsigned and use upper result.
+def : Pat <(mulhu DoubleRegs:$src1, DoubleRegs:$src2),
+ (MPYU64_acc(COMBINE_rr (TFRI 0),
+ (EXTRACT_SUBREG
+ (LSRd_ri(MPYU64_acc(MPYU64_acc(COMBINE_rr (TFRI 0),
+ (EXTRACT_SUBREG (LSRd_ri(MPYU64
+ (EXTRACT_SUBREG DoubleRegs:$src1,
+ subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2,
+ subreg_loreg)),
+ 32) ,subreg_loreg)),
+ (EXTRACT_SUBREG DoubleRegs:$src1,
+ subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$src2,
+ subreg_loreg)),
+ (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)),
+ 32),subreg_loreg)),
+ (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)
+ )>;
+
+// Multiply 64-bit signed and use upper result.
+def : Pat <(mulhs DoubleRegs:$src1, DoubleRegs:$src2),
+ (MPY64_acc(COMBINE_rr (TFRI 0),
+ (EXTRACT_SUBREG
+ (LSRd_ri(MPY64_acc(MPY64_acc(COMBINE_rr (TFRI 0),
+ (EXTRACT_SUBREG (LSRd_ri(MPYU64
+ (EXTRACT_SUBREG DoubleRegs:$src1,
+ subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2,
+ subreg_loreg)),
+ 32) ,subreg_loreg)),
+ (EXTRACT_SUBREG DoubleRegs:$src1,
+ subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$src2,
+ subreg_loreg)),
+ (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)),
+ 32),subreg_loreg)),
+ (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)
+ )>;
+
+// Hexagon specific ISD nodes.
+def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>]>;
+def Hexagon_ADJDYNALLOC : SDNode<"HexagonISD::ADJDYNALLOC",
+ SDTHexagonADJDYNALLOC>;
+// Needed to tag these instructions for stack layout.
+let usesCustomInserter = 1 in
+def ADJDYNALLOC : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1,
+ s16Imm:$src2),
+ "$dst = add($src1, #$src2)",
+ [(set IntRegs:$dst, (Hexagon_ADJDYNALLOC IntRegs:$src1,
+ s16ImmPred:$src2))]>;
+
+def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, []>;
+def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>;
+def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = $src1",
+ [(set IntRegs:$dst, (Hexagon_ARGEXTEND IntRegs:$src1))]>;
+
+let AddedComplexity = 100 in
+def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND IntRegs:$src1), i16)),
+ (TFR IntRegs:$src1)>;
+
+
+def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT : JRInst<(outs), (ins IntRegs:$src),
+ "jumpr $src",
+ [(HexagonBR_JT IntRegs:$src)]>;
+def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>;
+
+def : Pat<(HexagonWrapperJT tjumptable:$dst),
+ (CONST32_set_jt tjumptable:$dst)>;
+
+
+//===----------------------------------------------------------------------===//
+// V3 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV3.td"
+
+//===----------------------------------------------------------------------===//
+// V3 Instructions -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V4 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV4.td"
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV3.td b/lib/Target/Hexagon/HexagonInstrInfoV3.td
new file mode 100644
index 0000000..a73897e
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfoV3.td
@@ -0,0 +1,134 @@
+//=- HexagonInstrInfoV3.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V3 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// J +
+//===----------------------------------------------------------------------===//
+// Call subroutine.
+let isCall = 1, neverHasSideEffects = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31,
+ P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def CALLv3 : JInst<(outs), (ins calltarget:$dst, variable_ops),
+ "call $dst", []>, Requires<[HasV3T]>;
+}
+
+//===----------------------------------------------------------------------===//
+// J -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// JR +
+//===----------------------------------------------------------------------===//
+// Call subroutine from register.
+let isCall = 1, neverHasSideEffects = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31,
+ P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def CALLRv3 : JRInst<(outs), (ins IntRegs:$dst, variable_ops),
+ "callr $dst",
+ []>, Requires<[HasV3TOnly]>;
+ }
+
+
+// if(p?.new) jumpr:t r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cPnewt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) jumpr:t $src2",
+ []>, Requires<[HasV3T]>;
+}
+
+// if (!p?.new) jumpr:t r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cNotPnewt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) jumpr:t $src2",
+ []>, Requires<[HasV3T]>;
+}
+
+// Not taken.
+// if(p?.new) jumpr:nt r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cPnewNt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) jumpr:nt $src2",
+ []>, Requires<[HasV3T]>;
+}
+
+// if (!p?.new) jumpr:nt r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cNotPnewNt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) jumpr:nt $src2",
+ []>, Requires<[HasV3T]>;
+}
+
+//===----------------------------------------------------------------------===//
+// JR -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU +
+//===----------------------------------------------------------------------===//
+
+let AddedComplexity = 200 in
+def MAXw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = max($src2, $src1)",
+ [(set DoubleRegs:$dst, (select (i1 (setlt DoubleRegs:$src2,
+ DoubleRegs:$src1)),
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))]>,
+Requires<[HasV3T]>;
+
+let AddedComplexity = 200 in
+def MINw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = min($src2, $src1)",
+ [(set DoubleRegs:$dst, (select (i1 (setgt DoubleRegs:$src2,
+ DoubleRegs:$src1)),
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))]>,
+Requires<[HasV3T]>;
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU -
+//===----------------------------------------------------------------------===//
+
+
+
+
+//def : Pat <(brcond (i1 (seteq IntRegs:$src1, 0)), bb:$offset),
+// (JMP_RegEzt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setne IntRegs:$src1, 0)), bb:$offset),
+// (JMP_RegNzt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setle IntRegs:$src1, 0)), bb:$offset),
+// (JMP_RegLezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setge IntRegs:$src1, 0)), bb:$offset),
+// (JMP_RegGezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setgt IntRegs:$src1, -1)), bb:$offset),
+// (JMP_RegGezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>;
+
+
+// Map call instruction
+def : Pat<(call IntRegs:$dst),
+ (CALLRv3 IntRegs:$dst)>, Requires<[HasV3T]>;
+def : Pat<(call tglobaladdr:$dst),
+ (CALLv3 tglobaladdr:$dst)>, Requires<[HasV3T]>;
+def : Pat<(call texternalsym:$dst),
+ (CALLv3 texternalsym:$dst)>, Requires<[HasV3T]>;
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
new file mode 100644
index 0000000..24218d0
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -0,0 +1,3392 @@
+//=- HexagonInstrInfoV4.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V4 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+// Hexagon V4 Architecture spec defines 8 instruction classes:
+// LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the
+// compiler)
+
+// LD Instructions:
+// ========================================
+// Loads (8/16/32/64 bit)
+// Deallocframe
+
+// ST Instructions:
+// ========================================
+// Stores (8/16/32/64 bit)
+// Allocframe
+
+// ALU32 Instructions:
+// ========================================
+// Arithmetic / Logical (32 bit)
+// Vector Halfword
+
+// XTYPE Instructions (32/64 bit):
+// ========================================
+// Arithmetic, Logical, Bit Manipulation
+// Multiply (Integer, Fractional, Complex)
+// Permute / Vector Permute Operations
+// Predicate Operations
+// Shift / Shift with Add/Sub/Logical
+// Vector Byte ALU
+// Vector Halfword (ALU, Shift, Multiply)
+// Vector Word (ALU, Shift)
+
+// J Instructions:
+// ========================================
+// Jump/Call PC-relative
+
+// JR Instructions:
+// ========================================
+// Jump/Call Register
+
+// MEMOP Instructions:
+// ========================================
+// Operation on memory (8/16/32 bit)
+
+// NV Instructions:
+// ========================================
+// New-value Jumps
+// New-value Stores
+
+// CR Instructions:
+// ========================================
+// Control-Register Transfers
+// Hardware Loop Setup
+// Predicate Logicals & Reductions
+
+// SYSTEM Instructions (not implemented in the compiler):
+// ========================================
+// Prefetch
+// Cache Maintenance
+// Bus Operations
+
+
+//===----------------------------------------------------------------------===//
+// ALU32 +
+//===----------------------------------------------------------------------===//
+
+// Shift halfword.
+
+def ASLH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = aslh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def ASLH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1) $dst = aslh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def ASLH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) $dst = aslh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def ASLH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) $dst = aslh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def ASRH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = asrh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def ASRH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1) $dst = asrh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def ASRH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) $dst = asrh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def ASRH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) $dst = asrh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Sign extend.
+
+def SXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = sxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def SXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1) $dst = sxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def SXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) $dst = sxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def SXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) $dst = sxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+def SXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = sxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def SXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1) $dst = sxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def SXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) $dst = sxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def SXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) $dst = sxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Zero exten.
+
+let neverHasSideEffects = 1 in
+def ZXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = zxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1) $dst = zxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) $dst = zxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) $dst = zxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = zxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1) $dst = zxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) $dst = zxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) $dst = zxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===//
+// ALU32 -
+//===----------------------------------------------------------------------===//
+
+
+
+//===----------------------------------------------------------------------===//
+// LD +
+//===----------------------------------------------------------------------===//
+///
+/// Make sure that in post increment load, the first operand is always the post
+/// increment operand.
+///
+//// Load doubleword.
+// Rdd=memd(Re=#U6)
+
+// Rdd=memd(Rs+Rt<<#u2)
+// Special case pattern for indexed load without offset which is easier to
+// match. AddedComplexity of this pattern should be lower than base+offset load
+// and lower yet than the more generic version with offset/shift below
+// Similar approach is taken for all other base+index loads.
+let AddedComplexity = 10, isPredicable = 1 in
+def LDrid_indexed_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memd($src1+$src2<<#0)",
+ [(set DoubleRegs:$dst, (load (add IntRegs:$src1,
+ IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDrid_indexed_shl_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memd($src1+$src2<<#$offset)",
+ [(set DoubleRegs:$dst, (load (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u2ImmPred:$offset))))]>,
+ Requires<[HasV4T]>;
+
+//// Load doubleword conditionally.
+// if ([!]Pv[.new]) Rd=memd(Rs+Rt<<#u2)
+// if (Pv) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrid_indexed_cPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst=memd($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrid_indexed_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst=memd($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrid_indexed_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst=memd($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrid_indexed_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst=memd($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrid_indexed_shl_cPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1) $dst=memd($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrid_indexed_shl_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1.new) $dst=memd($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrid_indexed_shl_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1) $dst=memd($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrid_indexed_shl_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1.new) $dst=memd($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rdd=memd(Rt<<#u2+#U6)
+
+//// Load byte.
+// Rd=memb(Re=#U6)
+
+// Rd=memb(Rs+Rt<<#u2)
+let AddedComplexity = 10, isPredicable = 1 in
+def LDrib_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memb($src1+$src2<<#0)",
+ [(set IntRegs:$dst, (sextloadi8 (add IntRegs:$src1,
+ IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriub_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memub($src1+$src2<<#0)",
+ [(set IntRegs:$dst, (zextloadi8 (add IntRegs:$src1,
+ IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriub_ae_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memub($src1+$src2<<#0)",
+ [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1,
+ IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDrib_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memb($src1+$src2<<#$offset)",
+ [(set IntRegs:$dst,
+ (sextloadi8 (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u2ImmPred:$offset))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriub_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memub($src1+$src2<<#$offset)",
+ [(set IntRegs:$dst,
+ (zextloadi8 (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u2ImmPred:$offset))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriub_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memub($src1+$src2<<#$offset)",
+ [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u2ImmPred:$offset))))]>,
+ Requires<[HasV4T]>;
+
+//// Load byte conditionally.
+// if ([!]Pv[.new]) Rd=memb(Rs+Rt<<#u2)
+// if (Pv) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrib_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst=memb($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrib_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst=memb($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrib_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst=memb($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrib_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst=memb($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrib_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1) $dst=memb($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrib_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1.new) $dst=memb($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrib_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1) $dst=memb($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrib_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1.new) $dst=memb($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+//// Load unsigned byte conditionally.
+// if ([!]Pv[.new]) Rd=memub(Rs+Rt<<#u2)
+// if (Pv) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriub_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst=memub($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriub_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst=memub($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriub_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst=memub($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriub_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst=memub($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriub_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1) $dst=memub($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriub_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1.new) $dst=memub($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriub_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1) $dst=memub($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriub_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1.new) $dst=memub($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memb(Rt<<#u2+#U6)
+
+//// Load halfword
+// Rd=memh(Re=#U6)
+
+// Rd=memh(Rs+Rt<<#u2)
+let AddedComplexity = 10, isPredicable = 1 in
+def LDrih_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memh($src1+$src2<<#0)",
+ [(set IntRegs:$dst, (sextloadi16 (add IntRegs:$src1,
+ IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriuh_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memuh($src1+$src2<<#0)",
+ [(set IntRegs:$dst, (zextloadi16 (add IntRegs:$src1,
+ IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriuh_ae_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memuh($src1+$src2<<#0)",
+ [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1,
+ IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+// Rd=memh(Rs+Rt<<#u2)
+let AddedComplexity = 40, isPredicable = 1 in
+def LDrih_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memh($src1+$src2<<#$offset)",
+ [(set IntRegs:$dst,
+ (sextloadi16 (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u2ImmPred:$offset))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriuh_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memuh($src1+$src2<<#$offset)",
+ [(set IntRegs:$dst,
+ (zextloadi16 (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u2ImmPred:$offset))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriuh_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memuh($src1+$src2<<#$offset)",
+ [(set IntRegs:$dst,
+ (extloadi16 (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u2ImmPred:$offset))))]>,
+ Requires<[HasV4T]>;
+
+//// Load halfword conditionally.
+// if ([!]Pv[.new]) Rd=memh(Rs+Rt<<#u2)
+// if (Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrih_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst=memh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrih_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst=memh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrih_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst=memh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrih_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst=memh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrih_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1) $dst=memh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrih_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1.new) $dst=memh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrih_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1) $dst=memh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrih_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1.new) $dst=memh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+//// Load unsigned halfword conditionally.
+// if ([!]Pv[.new]) Rd=memuh(Rs+Rt<<#u2)
+// if (Pv) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriuh_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst=memuh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriuh_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst=memuh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriuh_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst=memuh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriuh_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst=memuh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriuh_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1) $dst=memuh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriuh_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1.new) $dst=memuh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriuh_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1) $dst=memuh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriuh_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1.new) $dst=memuh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memh(Rt<<#u2+#U6)
+
+//// Load word.
+// Rd=memw(Re=#U6)
+
+// Rd=memw(Rs+Rt<<#u2)
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriw_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memw($src1+$src2<<#0)",
+ [(set IntRegs:$dst, (load (add IntRegs:$src1,
+ IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+// Rd=memw(Rs+Rt<<#u2)
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriw_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memw($src1+$src2<<#$offset)",
+ [(set IntRegs:$dst, (load (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u2ImmPred:$offset))))]>,
+ Requires<[HasV4T]>;
+
+//// Load word conditionally.
+// if ([!]Pv[.new]) Rd=memw(Rs+Rt<<#u2)
+// if (Pv) Rd=memw(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriw_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst=memw($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriw_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst=memw($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriw_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst=memw($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriw_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst=memw($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriw_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1) $dst=memw($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriw_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1.new) $dst=memw($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriw_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1) $dst=memw($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriw_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1.new) $dst=memw($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memw(Rt<<#u2+#U6)
+
+
+// Post-inc Load, Predicated, Dot new
+
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid_cdnPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
+ "if ($src1.new) $dst1 = memd($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid_cdnNotPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
+ "if (!$src1.new) $dst1 = memd($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if ($src1.new) $dst1 = memb($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if (!$src1.new) $dst1 = memb($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if ($src1.new) $dst1 = memh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if (!$src1.new) $dst1 = memh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if ($src1.new) $dst1 = memub($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if (!$src1.new) $dst1 = memub($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if ($src1.new) $dst1 = memuh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if (!$src1.new) $dst1 = memuh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
+ "if ($src1.new) $dst1 = memw($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
+ "if (!$src1.new) $dst1 = memw($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===//
+// LD -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ST +
+//===----------------------------------------------------------------------===//
+///
+/// Assumptions::: ****** DO NOT IGNORE ********
+/// 1. Make sure that in post increment store, the zero'th operand is always the
+/// post increment operand.
+/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the
+/// last operand.
+///
+
+// Store doubleword.
+// memd(Re=#U6)=Rtt
+// TODO: needs to be implemented
+
+// memd(Rs+#s11:3)=Rtt
+// memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, isPredicable = 1 in
+def STrid_indexed_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, DoubleRegs:$src4),
+ "memd($src1+$src2<<#$src3) = $src4",
+ [(store DoubleRegs:$src4, (add IntRegs:$src1,
+ (shl IntRegs:$src2, u2ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// memd(Ru<<#u2+#U6)=Rtt
+let AddedComplexity = 10 in
+def STrid_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, DoubleRegs:$src4),
+ "memd($src1<<#$src2+#$src3) = $src4",
+ [(store DoubleRegs:$src4, (shl IntRegs:$src1,
+ (add u2ImmPred:$src2,
+ u6ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// memd(Rx++#s4:3)=Rtt
+// memd(Rx++#s4:3:circ(Mu))=Rtt
+// memd(Rx++I:circ(Mu))=Rtt
+// memd(Rx++Mu)=Rtt
+// memd(Rx++Mu:brev)=Rtt
+// memd(gp+#u16:3)=Rtt
+
+// Store doubleword conditionally.
+// if ([!]Pv[.new]) memd(#u6)=Rtt
+// TODO: needs to be implemented.
+
+// if ([!]Pv[.new]) memd(Rs+#u6:3)=Rtt
+// if (Pv) memd(Rs+#u6:3)=Rtt
+// if (Pv.new) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
+ "if ($src1.new) memd($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memd(Rs+#u6:3)=Rtt
+// if (!Pv.new) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
+ "if (!$src1.new) memd($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memd(Rs+#u6:3)=Rtt
+// if (Pv.new) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
+ DoubleRegs:$src4),
+ "if ($src1.new) memd($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memd(Rs+#u6:3)=Rtt
+// if (!Pv.new) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
+ DoubleRegs:$src4),
+ "if (!$src1.new) memd($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memd(Rs+Ru<<#u2)=Rtt
+// if (Pv) memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_shl_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ DoubleRegs:$src5),
+ "if ($src1) memd($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_shl_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ DoubleRegs:$src5),
+ "if ($src1) memd($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+// if (!Pv) memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_shl_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ DoubleRegs:$src5),
+ "if (!$src1) memd($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+// if (!Pv.new) memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ DoubleRegs:$src5),
+ "if (!$src1.new) memd($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memd(Rx++#s4:3)=Rtt
+// if (Pv) memd(Rx++#s4:3)=Rtt
+// if (Pv.new) memd(Rx++#s4:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def POST_STdri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
+ s4_3Imm:$offset),
+ "if ($src1.new) memd($src3++#$offset) = $src2",
+ [],
+ "$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memd(Rx++#s4:3)=Rtt
+// if (!Pv.new) memd(Rx++#s4:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def POST_STdri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
+ s4_3Imm:$offset),
+ "if (!$src1.new) memd($src3++#$offset) = $src2",
+ [],
+ "$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Store byte.
+// memb(Re=#U6)=Rt
+// TODO: needs to be implemented.
+// memb(Rs+#s11:0)=Rt
+// memb(Rs+#u6:0)=#S8
+let AddedComplexity = 10, isPredicable = 1 in
+def STrib_imm_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u6_0Imm:$src2, s8Imm:$src3),
+ "memb($src1+#$src2) = #$src3",
+ [(truncstorei8 s8ImmPred:$src3, (add IntRegs:$src1,
+ u6_0ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// memb(Rs+Ru<<#u2)=Rt
+let AddedComplexity = 10, isPredicable = 1 in
+def STrib_indexed_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+ "memb($src1+$src2<<#$src3) = $src4",
+ [(truncstorei8 IntRegs:$src4, (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u2ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// memb(Ru<<#u2+#U6)=Rt
+let AddedComplexity = 10 in
+def STrib_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ "memb($src1<<#$src2+#$src3) = $src4",
+ [(truncstorei8 IntRegs:$src4, (shl IntRegs:$src1,
+ (add u2ImmPred:$src2,
+ u6ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// memb(Rx++#s4:0:circ(Mu))=Rt
+// memb(Rx++I:circ(Mu))=Rt
+// memb(Rx++Mu)=Rt
+// memb(Rx++Mu:brev)=Rt
+// memb(gp+#u16:0)=Rt
+
+
+// Store byte conditionally.
+// if ([!]Pv[.new]) memb(#u6)=Rt
+// if ([!]Pv[.new]) memb(Rs+#u6:0)=#S6
+// if (Pv) memb(Rs+#u6:0)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_imm_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
+ "if ($src1) memb($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+#u6:0)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_imm_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
+ "if ($src1.new) memb($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+#u6:0)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_imm_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
+ "if (!$src1) memb($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+#u6:0)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_imm_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
+ "if (!$src1.new) memb($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memb(Rs+#u6:0)=Rt
+// if (Pv) memb(Rs+#u6:0)=Rt
+// if (Pv.new) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1.new) memb($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+#u6:0)=Rt
+// if (!Pv.new) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1.new) memb($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memb(Rs+#u6:0)=Rt
+// if (!Pv) memb(Rs+#u6:0)=Rt
+// if (Pv.new) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if ($src1.new) memb($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if (!$src1.new) memb($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Rt
+// if (Pv) memb(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1) memb($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1.new) memb($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1) memb($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memb(Rx++#s4:0)=Rt
+// if (Pv) memb(Rx++#s4:0)=Rt
+// if (Pv.new) memb(Rx++#s4:0)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if ($src1.new) memb($src3++#$offset) = $src2",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rx++#s4:0)=Rt
+// if (!Pv.new) memb(Rx++#s4:0)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if (!$src1.new) memb($src3++#$offset) = $src2",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Store halfword.
+// memh(Re=#U6)=Rt.H
+// TODO: needs to be implemented
+
+// memh(Re=#U6)=Rt
+// TODO: needs to be implemented
+
+// memh(Rs+#s11:1)=Rt.H
+// memh(Rs+#s11:1)=Rt
+// memh(Rs+#u6:1)=#S8
+let AddedComplexity = 10, isPredicable = 1 in
+def STrih_imm_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u6_1Imm:$src2, s8Imm:$src3),
+ "memh($src1+#$src2) = #$src3",
+ [(truncstorei16 s8ImmPred:$src3, (add IntRegs:$src1,
+ u6_1ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// memh(Rs+Ru<<#u2)=Rt.H
+// TODO: needs to be implemented.
+
+// memh(Rs+Ru<<#u2)=Rt
+let AddedComplexity = 10, isPredicable = 1 in
+def STrih_indexed_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+ "memh($src1+$src2<<#$src3) = $src4",
+ [(truncstorei16 IntRegs:$src4, (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u2ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// memh(Ru<<#u2+#U6)=Rt.H
+// memh(Ru<<#u2+#U6)=Rt
+let AddedComplexity = 10 in
+def STrih_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ "memh($src1<<#$src2+#$src3) = $src4",
+ [(truncstorei16 IntRegs:$src4, (shl IntRegs:$src1,
+ (add u2ImmPred:$src2,
+ u6ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// memh(Rx++#s4:1:circ(Mu))=Rt.H
+// memh(Rx++#s4:1:circ(Mu))=Rt
+// memh(Rx++I:circ(Mu))=Rt.H
+// memh(Rx++I:circ(Mu))=Rt
+// memh(Rx++Mu)=Rt.H
+// memh(Rx++Mu)=Rt
+// memh(Rx++Mu:brev)=Rt.H
+// memh(Rx++Mu:brev)=Rt
+// memh(gp+#u16:1)=Rt.H
+// memh(gp+#u16:1)=Rt
+
+
+// Store halfword conditionally.
+// if ([!]Pv[.new]) memh(#u6)=Rt.H
+// if ([!]Pv[.new]) memh(#u6)=Rt
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=#S6
+// if (Pv) memh(Rs+#u6:1)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_imm_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
+ "if ($src1) memh($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+#u6:1)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_imm_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
+ "if ($src1.new) memh($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+#u6:1)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_imm_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
+ "if (!$src1) memh($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+#u6:1)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_imm_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
+ "if (!$src1.new) memh($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H
+// TODO: needs to be implemented.
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt
+// if (Pv) memh(Rs+#u6:1)=Rt
+// if (Pv.new) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1.new) memh($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+#u6:1)=Rt
+// if (!Pv.new) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1.new) memh($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if ($src1.new) memh($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if (!$src1.new) memh($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt.H
+// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt
+// if (Pv) memh(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1) memh($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+Ru<<#u2)=Rt
+def STrih_indexed_shl_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1.new) memh($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1) memh($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H
+// TODO: Needs to be implemented.
+
+// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt
+// if (Pv) memh(Rx++#s4:1)=Rt
+// if (Pv.new) memh(Rx++#s4:1)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if ($src1.new) memh($src3++#$offset) = $src2",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rx++#s4:1)=Rt
+// if (!Pv.new) memh(Rx++#s4:1)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if (!$src1.new) memh($src3++#$offset) = $src2",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Store word.
+// memw(Re=#U6)=Rt
+// TODO: Needs to be implemented.
+
+// memw(Rs+#s11:2)=Rt
+// memw(Rs+#u6:2)=#S8
+let AddedComplexity = 10, isPredicable = 1 in
+def STriw_imm_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u6_2Imm:$src2, s8Imm:$src3),
+ "memw($src1+#$src2) = #$src3",
+ [(store s8ImmPred:$src3, (add IntRegs:$src1, u6_2ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// memw(Rs+Ru<<#u2)=Rt
+let AddedComplexity = 10, isPredicable = 1 in
+def STriw_indexed_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+ "memw($src1+$src2<<#$src3) = $src4",
+ [(store IntRegs:$src4, (add IntRegs:$src1,
+ (shl IntRegs:$src2, u2ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// memw(Ru<<#u2+#U6)=Rt
+let AddedComplexity = 10 in
+def STriw_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ "memw($src1<<#$src2+#$src3) = $src4",
+ [(store IntRegs:$src4, (shl IntRegs:$src1,
+ (add u2ImmPred:$src2, u6ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// memw(Rx++#s4:2)=Rt
+// memw(Rx++#s4:2:circ(Mu))=Rt
+// memw(Rx++I:circ(Mu))=Rt
+// memw(Rx++Mu)=Rt
+// memw(Rx++Mu:brev)=Rt
+// memw(gp+#u16:2)=Rt
+
+
+// Store word conditionally.
+// if ([!]Pv[.new]) memw(#u6)=Rt
+// TODO: Needs to be implemented.
+
+// if ([!]Pv[.new]) memw(Rs+#u6:2)=#S6
+// if (Pv) memw(Rs+#u6:2)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_imm_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
+ "if ($src1) memw($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+#u6:2)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_imm_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
+ "if ($src1.new) memw($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+#u6:2)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_imm_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
+ "if (!$src1) memw($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+#u6:2)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_imm_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
+ "if (!$src1.new) memw($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memw(Rs+#u6:2)=Rt
+// if (Pv) memw(Rs+#u6:2)=Rt
+// if (Pv.new) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1.new) memw($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+#u6:2)=Rt
+// if (!Pv.new) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1.new) memw($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memw(Rs+#u6:2)=Rt
+// if (!Pv) memw(Rs+#u6:2)=Rt
+// if (Pv.new) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if ($src1.new) memw($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if (!$src1.new) memw($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Rt
+// if (Pv) memw(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1) memw($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1.new) memw($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1) memw($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memw(Rx++#s4:2)=Rt
+// if (Pv) memw(Rx++#s4:2)=Rt
+// if (Pv.new) memw(Rx++#s4:2)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if ($src1.new) memw($src3++#$offset) = $src2",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rx++#s4:2)=Rt
+// if (!Pv.new) memw(Rx++#s4:2)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if (!$src1.new) memw($src3++#$offset) = $src2",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===
+// ST -
+//===----------------------------------------------------------------------===
+
+
+//===----------------------------------------------------------------------===//
+// NV/ST +
+//===----------------------------------------------------------------------===//
+
+// Store new-value byte.
+
+// memb(Re=#U6)=Nt.new
+// memb(Rs+#s11:0)=Nt.new
+let mayStore = 1, isPredicable = 1 in
+def STrib_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1),
+ "memb($addr) = $src1.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, isPredicable = 1 in
+def STrib_indexed_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3),
+ "memb($src1+#$src2) = $src3.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
+def STrib_indexed_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+ "memb($src1+$src2<<#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memb(Ru<<#u2+#U6)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ "memb($src1<<#$src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in
+def POST_STbri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, s4_0Imm:$offset),
+ "memb($src2++#$offset) = $src1.new",
+ [],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// memb(Rx++#s4:0:circ(Mu))=Nt.new
+// memb(Rx++I:circ(Mu))=Nt.new
+// memb(Rx++Mu)=Nt.new
+// memb(Rx++Mu:brev)=Nt.new
+
+// memb(gp+#u16:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP_nv_V4 : NVInst_V4<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memb(#$global+$offset) = $src.new",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// Store new-value byte conditionally.
+// if ([!]Pv[.new]) memb(#u6)=Nt.new
+// if (Pv) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1) memb($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1.new) memb($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1) memb($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1.new) memb($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if ($src1) memb($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if ($src1.new) memb($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if (!$src1) memb($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if (!$src1.new) memb($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Nt.new
+// if (Pv) memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1) memb($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1.new) memb($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1) memb($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memb(Rx++#s4:0)=Nt.new
+// if (Pv) memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if ($src1) memb($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if ($src1.new) memb($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if (!$src1) memb($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if (!$src1.new) memb($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Store new-value halfword.
+// memh(Re=#U6)=Nt.new
+// memh(Rs+#s11:1)=Nt.new
+let mayStore = 1, isPredicable = 1 in
+def STrih_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1),
+ "memh($addr) = $src1.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, isPredicable = 1 in
+def STrih_indexed_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, s11_1Imm:$src2, IntRegs:$src3),
+ "memh($src1+#$src2) = $src3.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
+def STrih_indexed_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+ "memh($src1+$src2<<#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memh(Ru<<#u2+#U6)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ "memh($src1<<#$src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in
+def POST_SThri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, s4_1Imm:$offset),
+ "memh($src2++#$offset) = $src1.new",
+ [],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// memh(Rx++#s4:1:circ(Mu))=Nt.new
+// memh(Rx++I:circ(Mu))=Nt.new
+// memh(Rx++Mu)=Nt.new
+// memh(Rx++Mu:brev)=Nt.new
+
+// memh(gp+#u16:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP_nv_V4 : NVInst_V4<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memh(#$global+$offset) = $src.new",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// Store new-value halfword conditionally.
+
+// if ([!]Pv[.new]) memh(#u6)=Nt.new
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=Nt.new
+// if (Pv) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1) memh($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1.new) memh($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1) memh($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1.new) memh($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if ($src1) memh($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if ($src1.new) memh($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if (!$src1) memh($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if (!$src1.new) memh($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Nt.new
+// if (Pv) memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1) memh($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1.new) memh($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1) memh($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[]) memh(Rx++#s4:1)=Nt.new
+// if (Pv) memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if ($src1) memh($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if ($src1.new) memh($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if (!$src1) memh($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if (!$src1.new) memh($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Store new-value word.
+
+// memw(Re=#U6)=Nt.new
+// memw(Rs+#s11:2)=Nt.new
+let mayStore = 1, isPredicable = 1 in
+def STriw_nv_V4 : NVInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$src1),
+ "memw($addr) = $src1.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, isPredicable = 1 in
+def STriw_indexed_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
+ "memw($src1+#$src2) = $src3.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
+def STriw_indexed_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+ "memw($src1+$src2<<#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memw(Ru<<#u2+#U6)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ "memw($src1<<#$src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in
+def POST_STwri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, s4_2Imm:$offset),
+ "memw($src2++#$offset) = $src1.new",
+ [],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// memw(Rx++#s4:2:circ(Mu))=Nt.new
+// memw(Rx++I:circ(Mu))=Nt.new
+// memw(Rx++Mu)=Nt.new
+// memw(Rx++Mu:brev)=Nt.new
+// memw(gp+#u16:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP_nv_V4 : NVInst_V4<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memw(#$global+$offset) = $src.new",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// Store new-value word conditionally.
+
+// if ([!]Pv[.new]) memw(#u6)=Nt.new
+
+// if ([!]Pv[.new]) memw(Rs+#u6:2)=Nt.new
+// if (Pv) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1) memw($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1.new) memw($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1) memw($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1.new) memw($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if ($src1) memw($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if ($src1.new) memw($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if (!$src1) memw($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if (!$src1.new) memw($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Nt.new
+// if (Pv) memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1) memw($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1.new) memw($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1) memw($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memw(Rx++#s4:2)=Nt.new
+// if (Pv) memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if ($src1) memw($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if ($src1.new) memw($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if (!$src1) memw($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if (!$src1.new) memw($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===//
+// NV/ST -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/ALU +
+//===----------------------------------------------------------------------===//
+
+// Add and accumulate.
+// Rd=add(Rs,add(Ru,#s6))
+def ADDr_ADDri_V4 : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3),
+ "$dst = add($src1, add($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (add IntRegs:$src1, (add IntRegs:$src2, s6ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// Rd=add(Rs,sub(#s6,Ru))
+def ADDr_SUBri_V4 : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+ "$dst = add($src1, sub(#$src2, $src3))",
+ [(set IntRegs:$dst,
+ (add IntRegs:$src1, (sub s6ImmPred:$src2, IntRegs:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// Generates the same instruction as ADDr_SUBri_V4 but matches different
+// pattern.
+// Rd=add(Rs,sub(#s6,Ru))
+def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+ "$dst = add($src1, sub(#$src2, $src3))",
+ [(set IntRegs:$dst,
+ (sub (add IntRegs:$src1, s6ImmPred:$src2), IntRegs:$src3))]>,
+ Requires<[HasV4T]>;
+
+
+// Add or subtract doublewords with carry.
+//TODO:
+// Rdd=add(Rss,Rtt,Px):carry
+//TODO:
+// Rdd=sub(Rss,Rtt,Px):carry
+
+
+// Logical doublewords.
+// Rdd=and(Rtt,~Rss)
+def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ "$dst = and($src1, ~$src2)",
+ [(set DoubleRegs:$dst, (and DoubleRegs:$src1,
+ (not DoubleRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+// Rdd=or(Rtt,~Rss)
+def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ "$dst = or($src1, ~$src2)",
+ [(set DoubleRegs:$dst,
+ (or DoubleRegs:$src1, (not DoubleRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+
+// Logical-logical doublewords.
+// Rxx^=xor(Rss,Rtt)
+def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ "$dst ^= xor($src2, $src3)",
+ [(set DoubleRegs:$dst,
+ (xor DoubleRegs:$src1, (xor DoubleRegs:$src2, DoubleRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Logical-logical words.
+// Rx=or(Ru,and(Rx,#s10))
+def ORr_ANDri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+ "$dst = or($src1, and($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx[&|^]=and(Rs,Rt)
+// Rx&=and(Rs,Rt)
+def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst &= and($src2, $src3)",
+ [(set IntRegs:$dst,
+ (and IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=and(Rs,Rt)
+def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst |= and($src2, $src3)",
+ [(set IntRegs:$dst,
+ (or IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx^=and(Rs,Rt)
+def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst ^= and($src2, $src3)",
+ [(set IntRegs:$dst,
+ (xor IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx[&|^]=and(Rs,~Rt)
+// Rx&=and(Rs,~Rt)
+def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst &= and($src2, ~$src3)",
+ [(set IntRegs:$dst,
+ (and IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=and(Rs,~Rt)
+def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst |= and($src2, ~$src3)",
+ [(set IntRegs:$dst,
+ (or IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx^=and(Rs,~Rt)
+def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst ^= and($src2, ~$src3)",
+ [(set IntRegs:$dst,
+ (xor IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx[&|^]=or(Rs,Rt)
+// Rx&=or(Rs,Rt)
+def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst &= or($src2, $src3)",
+ [(set IntRegs:$dst,
+ (and IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=or(Rs,Rt)
+def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst |= or($src2, $src3)",
+ [(set IntRegs:$dst,
+ (or IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx^=or(Rs,Rt)
+def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst ^= or($src2, $src3)",
+ [(set IntRegs:$dst,
+ (xor IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx[&|^]=xor(Rs,Rt)
+// Rx&=xor(Rs,Rt)
+def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst &= xor($src2, $src3)",
+ [(set IntRegs:$dst,
+ (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=xor(Rs,Rt)
+def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst |= xor($src2, $src3)",
+ [(set IntRegs:$dst,
+ (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx^=xor(Rs,Rt)
+def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst ^= xor($src2, $src3)",
+ [(set IntRegs:$dst,
+ (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=and(Rs,#s10)
+def ORr_ANDri2_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+ "$dst |= and($src2, #$src3)",
+ [(set IntRegs:$dst,
+ (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=or(Rs,#s10)
+def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+ "$dst |= or($src2, #$src3)",
+ [(set IntRegs:$dst,
+ (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Modulo wrap
+// Rd=modwrap(Rs,Rt)
+// Round
+// Rd=cround(Rs,#u5)
+// Rd=cround(Rs,Rt)
+// Rd=round(Rs,#u5)[:sat]
+// Rd=round(Rs,Rt)[:sat]
+// Vector reduce add unsigned halfwords
+// Rd=vraddh(Rss,Rtt)
+// Vector add bytes
+// Rdd=vaddb(Rss,Rtt)
+// Vector conditional negate
+// Rdd=vcnegh(Rss,Rt)
+// Rxx+=vrcnegh(Rss,Rt)
+// Vector maximum bytes
+// Rdd=vmaxb(Rtt,Rss)
+// Vector reduce maximum halfwords
+// Rxx=vrmaxh(Rss,Ru)
+// Rxx=vrmaxuh(Rss,Ru)
+// Vector reduce maximum words
+// Rxx=vrmaxuw(Rss,Ru)
+// Rxx=vrmaxw(Rss,Ru)
+// Vector minimum bytes
+// Rdd=vminb(Rtt,Rss)
+// Vector reduce minimum halfwords
+// Rxx=vrminh(Rss,Ru)
+// Rxx=vrminuh(Rss,Ru)
+// Vector reduce minimum words
+// Rxx=vrminuw(Rss,Ru)
+// Rxx=vrminw(Rss,Ru)
+// Vector subtract bytes
+// Rdd=vsubb(Rss,Rtt)
+
+//===----------------------------------------------------------------------===//
+// XTYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/MPY +
+//===----------------------------------------------------------------------===//
+
+// Multiply and user lower result.
+// Rd=add(#u6,mpyi(Rs,#U6))
+def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst),
+ (ins u6Imm:$src1, IntRegs:$src2, u6Imm:$src3),
+ "$dst = add(#$src1, mpyi($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (add (mul IntRegs:$src2, u6ImmPred:$src3), u6ImmPred:$src1))]>,
+ Requires<[HasV4T]>;
+
+// Rd=add(#u6,mpyi(Rs,Rt))
+
+def ADDi_MPYrr_V4 : MInst<(outs IntRegs:$dst),
+ (ins u6Imm:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst = add(#$src1, mpyi($src2, $src3))",
+ [(set IntRegs:$dst,
+ (add (mul IntRegs:$src2, IntRegs:$src3), u6ImmPred:$src1))]>,
+ Requires<[HasV4T]>;
+
+// Rd=add(Ru,mpyi(#u6:2,Rs))
+def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ "$dst = add($src1, mpyi(#$src2, $src3))",
+ [(set IntRegs:$dst,
+ (add IntRegs:$src1, (mul IntRegs:$src3, u6_2ImmPred:$src2)))]>,
+ Requires<[HasV4T]>;
+
+// Rd=add(Ru,mpyi(Rs,#u6))
+def ADDr_MPYri_V4 : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u6Imm:$src3),
+ "$dst = add($src1, mpyi($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (add IntRegs:$src1, (mul IntRegs:$src2, u6ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// Rx=add(Ru,mpyi(Rx,Rs))
+def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst = add($src1, mpyi($src2, $src3))",
+ [(set IntRegs:$dst,
+ (add IntRegs:$src1, (mul IntRegs:$src2, IntRegs:$src3)))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Polynomial multiply words
+// Rdd=pmpyw(Rs,Rt)
+// Rxx^=pmpyw(Rs,Rt)
+
+// Vector reduce multiply word by signed half (32x16)
+// Rdd=vrmpyweh(Rss,Rtt)[:<<1]
+// Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+// Rxx+=vrmpyweh(Rss,Rtt)[:<<1]
+// Rxx+=vrmpywoh(Rss,Rtt)[:<<1]
+
+// Multiply and use upper result
+// Rd=mpy(Rs,Rt.H):<<1:sat
+// Rd=mpy(Rs,Rt.L):<<1:sat
+// Rd=mpy(Rs,Rt):<<1
+// Rd=mpy(Rs,Rt):<<1:sat
+// Rd=mpysu(Rs,Rt)
+// Rx+=mpy(Rs,Rt):<<1:sat
+// Rx-=mpy(Rs,Rt):<<1:sat
+
+// Vector multiply bytes
+// Rdd=vmpybsu(Rs,Rt)
+// Rdd=vmpybu(Rs,Rt)
+// Rxx+=vmpybsu(Rs,Rt)
+// Rxx+=vmpybu(Rs,Rt)
+
+// Vector polynomial multiply halfwords
+// Rdd=vpmpyh(Rs,Rt)
+// Rxx^=vpmpyh(Rs,Rt)
+
+//===----------------------------------------------------------------------===//
+// XTYPE/MPY -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/SHIFT +
+//===----------------------------------------------------------------------===//
+
+// Shift by immediate and accumulate.
+// Rx=add(#u8,asl(Rx,#U5))
+def ADDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = add(#$src1, asl($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (add (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx=add(#u8,lsr(Rx,#U5))
+def ADDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = add(#$src1, lsr($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (add (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx=sub(#u8,asl(Rx,#U5))
+def SUBi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = sub(#$src1, asl($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (sub (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx=sub(#u8,lsr(Rx,#U5))
+def SUBi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = sub(#$src1, lsr($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (sub (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+
+//Shift by immediate and logical.
+//Rx=and(#u8,asl(Rx,#U5))
+def ANDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = and(#$src1, asl($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (and (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rx=and(#u8,lsr(Rx,#U5))
+def ANDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = and(#$src1, lsr($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (and (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rx=or(#u8,asl(Rx,#U5))
+def ORi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = or(#$src1, asl($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (or (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rx=or(#u8,lsr(Rx,#U5))
+def ORi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = or(#$src1, lsr($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (or (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+
+//Shift by register.
+//Rd=lsl(#s6,Rt)
+def LSLi_V4 : MInst<(outs IntRegs:$dst), (ins s6Imm:$src1, IntRegs:$src2),
+ "$dst = lsl(#$src1, $src2)",
+ [(set IntRegs:$dst, (shl s6ImmPred:$src1, IntRegs:$src2))]>,
+ Requires<[HasV4T]>;
+
+
+//Shift by register and logical.
+//Rxx^=asl(Rss,Rt)
+def ASLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ "$dst ^= asl($src2, $src3)",
+ [(set DoubleRegs:$dst,
+ (xor DoubleRegs:$src1, (shl DoubleRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rxx^=asr(Rss,Rt)
+def ASRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ "$dst ^= asr($src2, $src3)",
+ [(set DoubleRegs:$dst,
+ (xor DoubleRegs:$src1, (sra DoubleRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rxx^=lsl(Rss,Rt)
+def LSLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ "$dst ^= lsl($src2, $src3)",
+ [(set DoubleRegs:$dst,
+ (xor DoubleRegs:$src1, (shl DoubleRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rxx^=lsr(Rss,Rt)
+def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ "$dst ^= lsr($src2, $src3)",
+ [(set DoubleRegs:$dst,
+ (xor DoubleRegs:$src1, (srl DoubleRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/SHIFT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Word, Half, Byte
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Word
+//
+// Implemented:
+// MEMw_ADDi_indexed_V4 : memw(Rs+#u6:2)+=#U5
+// MEMw_SUBi_indexed_V4 : memw(Rs+#u6:2)-=#U5
+// MEMw_ADDr_indexed_V4 : memw(Rs+#u6:2)+=Rt
+// MEMw_SUBr_indexed_V4 : memw(Rs+#u6:2)-=Rt
+// MEMw_CLRr_indexed_V4 : memw(Rs+#u6:2)&=Rt
+// MEMw_SETr_indexed_V4 : memw(Rs+#u6:2)|=Rt
+// MEMw_ADDi_V4 : memw(Rs+#u6:2)+=#U5
+// MEMw_SUBi_V4 : memw(Rs+#u6:2)-=#U5
+// MEMw_ADDr_V4 : memw(Rs+#u6:2)+=Rt
+// MEMw_SUBr_V4 : memw(Rs+#u6:2)-=Rt
+// MEMw_CLRr_V4 : memw(Rs+#u6:2)&=Rt
+// MEMw_SETr_V4 : memw(Rs+#u6:2)|=Rt
+//
+// Not implemented:
+// MEMw_CLRi_indexed_V4 : memw(Rs+#u6:2)=clrbit(#U5)
+// MEMw_SETi_indexed_V4 : memw(Rs+#u6:2)=setbit(#U5)
+// MEMw_CLRi_V4 : memw(Rs+#u6:2)=clrbit(#U5)
+// MEMw_SETi_V4 : memw(Rs+#u6:2)=setbit(#U5)
+//===----------------------------------------------------------------------===//
+
+
+// MEMw_ADDSUBi_indexed_V4:
+// pseudo operation for MEMw_ADDi_indexed_V4 and
+// MEMw_SUBi_indexed_V4 a later pass will change it
+// to the corresponding pattern.
+let AddedComplexity = 30 in
+def MEMw_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, m6Imm:$addend),
+ "Error; should not emit",
+ [(store (add (load (add IntRegs:$base, u6_2ImmPred:$offset)),
+m6ImmPred:$addend),
+ (add IntRegs:$base, u6_2ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) += #U5
+let AddedComplexity = 30 in
+def MEMw_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$addend),
+ "memw($base+#$offset) += $addend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) -= #U5
+let AddedComplexity = 30 in
+def MEMw_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$subend),
+ "memw($base+#$offset) -= $subend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) += Rt
+let AddedComplexity = 30 in
+def MEMw_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$addend),
+ "memw($base+#$offset) += $addend",
+ [(store (add (load (add IntRegs:$base, u6_2ImmPred:$offset)),
+IntRegs:$addend),
+ (add IntRegs:$base, u6_2ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) -= Rt
+let AddedComplexity = 30 in
+def MEMw_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$subend),
+ "memw($base+#$offset) -= $subend",
+ [(store (sub (load (add IntRegs:$base, u6_2ImmPred:$offset)),
+IntRegs:$subend),
+ (add IntRegs:$base, u6_2ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) &= Rt
+let AddedComplexity = 30 in
+def MEMw_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$andend),
+ "memw($base+#$offset) += $andend",
+ [(store (and (load (add IntRegs:$base, u6_2ImmPred:$offset)),
+IntRegs:$andend),
+ (add IntRegs:$base, u6_2ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) |= Rt
+let AddedComplexity = 30 in
+def MEMw_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$orend),
+ "memw($base+#$offset) |= $orend",
+ [(store (or (load (add IntRegs:$base, u6_2ImmPred:$offset)),
+ IntRegs:$orend),
+ (add IntRegs:$base, u6_2ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// MEMw_ADDSUBi_V4:
+// Pseudo operation for MEMw_ADDi_V4 and MEMw_SUBi_V4
+// a later pass will change it to the right pattern.
+let AddedComplexity = 30 in
+def MEMw_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, m6Imm:$addend),
+ "Error; should not emit",
+ [(store (add (load ADDRriU6_2:$addr), m6ImmPred:$addend),
+ ADDRriU6_2:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) += #U5
+let AddedComplexity = 30 in
+def MEMw_ADDi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, u5Imm:$addend),
+ "memw($addr) += $addend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) -= #U5
+let AddedComplexity = 30 in
+def MEMw_SUBi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, u5Imm:$subend),
+ "memw($addr) -= $subend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) += Rt
+let AddedComplexity = 30 in
+def MEMw_ADDr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$addend),
+ "memw($addr) += $addend",
+ [(store (add (load ADDRriU6_2:$addr), IntRegs:$addend),
+ ADDRriU6_2:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) -= Rt
+let AddedComplexity = 30 in
+def MEMw_SUBr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$subend),
+ "memw($addr) -= $subend",
+ [(store (sub (load ADDRriU6_2:$addr), IntRegs:$subend),
+ ADDRriU6_2:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) &= Rt
+let AddedComplexity = 30 in
+def MEMw_ANDr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$andend),
+ "memw($addr) &= $andend",
+ [(store (and (load ADDRriU6_2:$addr), IntRegs:$andend),
+ ADDRriU6_2:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) |= Rt
+let AddedComplexity = 30 in
+def MEMw_ORr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$orend),
+ "memw($addr) |= $orend",
+ [(store (or (load ADDRriU6_2:$addr), IntRegs:$orend),
+ADDRriU6_2:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Halfword
+//
+// Implemented:
+// MEMh_ADDi_indexed_V4 : memw(Rs+#u6:2)+=#U5
+// MEMh_SUBi_indexed_V4 : memw(Rs+#u6:2)-=#U5
+// MEMh_ADDr_indexed_V4 : memw(Rs+#u6:2)+=Rt
+// MEMh_SUBr_indexed_V4 : memw(Rs+#u6:2)-=Rt
+// MEMh_CLRr_indexed_V4 : memw(Rs+#u6:2)&=Rt
+// MEMh_SETr_indexed_V4 : memw(Rs+#u6:2)|=Rt
+// MEMh_ADDi_V4 : memw(Rs+#u6:2)+=#U5
+// MEMh_SUBi_V4 : memw(Rs+#u6:2)-=#U5
+// MEMh_ADDr_V4 : memw(Rs+#u6:2)+=Rt
+// MEMh_SUBr_V4 : memw(Rs+#u6:2)-=Rt
+// MEMh_CLRr_V4 : memw(Rs+#u6:2)&=Rt
+// MEMh_SETr_V4 : memw(Rs+#u6:2)|=Rt
+//
+// Not implemented:
+// MEMh_CLRi_indexed_V4 : memw(Rs+#u6:2)=clrbit(#U5)
+// MEMh_SETi_indexed_V4 : memw(Rs+#u6:2)=setbit(#U5)
+// MEMh_CLRi_V4 : memw(Rs+#u6:2)=clrbit(#U5)
+// MEMh_SETi_V4 : memw(Rs+#u6:2)=setbit(#U5)
+//===----------------------------------------------------------------------===//
+
+
+// MEMh_ADDSUBi_indexed_V4:
+// Pseudo operation for MEMh_ADDi_indexed_V4 and
+// MEMh_SUBi_indexed_V4 a later pass will change it
+// to the corresponding pattern.
+let AddedComplexity = 30 in
+def MEMh_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, m6Imm:$addend),
+ "Error; should not emit",
+ [(truncstorei16 (add (sextloadi16 (add IntRegs:$base,
+ u6_1ImmPred:$offset)),
+ m6ImmPred:$addend),
+ (add IntRegs:$base, u6_1ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) += #U5
+let AddedComplexity = 30 in
+def MEMh_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$addend),
+ "memh($base+#$offset) += $addend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) -= #U5
+let AddedComplexity = 30 in
+def MEMh_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$subend),
+ "memh($base+#$offset) -= $subend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) += Rt
+let AddedComplexity = 30 in
+def MEMh_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$addend),
+ "memh($base+#$offset) += $addend",
+ [(truncstorei16 (add (sextloadi16 (add IntRegs:$base,
+ u6_1ImmPred:$offset)),
+ IntRegs:$addend),
+ (add IntRegs:$base, u6_1ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) -= Rt
+let AddedComplexity = 30 in
+def MEMh_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$subend),
+ "memh($base+#$offset) -= $subend",
+ [(truncstorei16 (sub (sextloadi16 (add IntRegs:$base,
+ u6_1ImmPred:$offset)),
+ IntRegs:$subend),
+ (add IntRegs:$base, u6_1ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) &= Rt
+let AddedComplexity = 30 in
+def MEMh_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$andend),
+ "memh($base+#$offset) += $andend",
+ [(truncstorei16 (and (sextloadi16 (add IntRegs:$base,
+ u6_1ImmPred:$offset)),
+ IntRegs:$andend),
+ (add IntRegs:$base, u6_1ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) |= Rt
+let AddedComplexity = 30 in
+def MEMh_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$orend),
+ "memh($base+#$offset) |= $orend",
+ [(truncstorei16 (or (sextloadi16 (add IntRegs:$base,
+ u6_1ImmPred:$offset)),
+ IntRegs:$orend),
+ (add IntRegs:$base, u6_1ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// MEMh_ADDSUBi_V4:
+// Pseudo operation for MEMh_ADDi_V4 and MEMh_SUBi_V4
+// a later pass will change it to the right pattern.
+let AddedComplexity = 30 in
+def MEMh_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, m6Imm:$addend),
+ "Error; should not emit",
+ [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr),
+ m6ImmPred:$addend), ADDRriU6_1:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) += #U5
+let AddedComplexity = 30 in
+def MEMh_ADDi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, u5Imm:$addend),
+ "memh($addr) += $addend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) -= #U5
+let AddedComplexity = 30 in
+def MEMh_SUBi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, u5Imm:$subend),
+ "memh($addr) -= $subend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) += Rt
+let AddedComplexity = 30 in
+def MEMh_ADDr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$addend),
+ "memh($addr) += $addend",
+ [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr),
+ IntRegs:$addend), ADDRriU6_1:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) -= Rt
+let AddedComplexity = 30 in
+def MEMh_SUBr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$subend),
+ "memh($addr) -= $subend",
+ [(truncstorei16 (sub (sextloadi16 ADDRriU6_1:$addr),
+ IntRegs:$subend), ADDRriU6_1:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) &= Rt
+let AddedComplexity = 30 in
+def MEMh_ANDr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$andend),
+ "memh($addr) &= $andend",
+ [(truncstorei16 (and (sextloadi16 ADDRriU6_1:$addr),
+ IntRegs:$andend), ADDRriU6_1:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) |= Rt
+let AddedComplexity = 30 in
+def MEMh_ORr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$orend),
+ "memh($addr) |= $orend",
+ [(truncstorei16 (or (sextloadi16 ADDRriU6_1:$addr),
+ IntRegs:$orend), ADDRriU6_1:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Byte
+//
+// Implemented:
+// MEMb_ADDi_indexed_V4 : memb(Rs+#u6:0)+=#U5
+// MEMb_SUBi_indexed_V4 : memb(Rs+#u6:0)-=#U5
+// MEMb_ADDr_indexed_V4 : memb(Rs+#u6:0)+=Rt
+// MEMb_SUBr_indexed_V4 : memb(Rs+#u6:0)-=Rt
+// MEMb_CLRr_indexed_V4 : memb(Rs+#u6:0)&=Rt
+// MEMb_SETr_indexed_V4 : memb(Rs+#u6:0)|=Rt
+// MEMb_ADDi_V4 : memb(Rs+#u6:0)+=#U5
+// MEMb_SUBi_V4 : memb(Rs+#u6:0)-=#U5
+// MEMb_ADDr_V4 : memb(Rs+#u6:0)+=Rt
+// MEMb_SUBr_V4 : memb(Rs+#u6:0)-=Rt
+// MEMb_CLRr_V4 : memb(Rs+#u6:0)&=Rt
+// MEMb_SETr_V4 : memb(Rs+#u6:0)|=Rt
+//
+// Not implemented:
+// MEMb_CLRi_indexed_V4 : memb(Rs+#u6:0)=clrbit(#U5)
+// MEMb_SETi_indexed_V4 : memb(Rs+#u6:0)=setbit(#U5)
+// MEMb_CLRi_V4 : memb(Rs+#u6:0)=clrbit(#U5)
+// MEMb_SETi_V4 : memb(Rs+#u6:0)=setbit(#U5)
+//===----------------------------------------------------------------------===//
+
+
+// MEMb_ADDSUBi_indexed_V4:
+// Pseudo operation for MEMb_ADDi_indexed_V4 and
+// MEMb_SUBi_indexed_V4 a later pass will change it
+// to the corresponding pattern.
+let AddedComplexity = 30 in
+def MEMb_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, m6Imm:$addend),
+ "Error; should not emit",
+ [(truncstorei8 (add (sextloadi8 (add IntRegs:$base,
+ u6_0ImmPred:$offset)),
+ m6ImmPred:$addend),
+ (add IntRegs:$base, u6_0ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) += #U5
+let AddedComplexity = 30 in
+def MEMb_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$addend),
+ "memb($base+#$offset) += $addend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) -= #U5
+let AddedComplexity = 30 in
+def MEMb_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$subend),
+ "memb($base+#$offset) -= $subend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) += Rt
+let AddedComplexity = 30 in
+def MEMb_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$addend),
+ "memb($base+#$offset) += $addend",
+ [(truncstorei8 (add (sextloadi8 (add IntRegs:$base,
+ u6_0ImmPred:$offset)),
+ IntRegs:$addend),
+ (add IntRegs:$base, u6_0ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) -= Rt
+let AddedComplexity = 30 in
+def MEMb_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$subend),
+ "memb($base+#$offset) -= $subend",
+ [(truncstorei8 (sub (sextloadi8 (add IntRegs:$base,
+ u6_0ImmPred:$offset)),
+ IntRegs:$subend),
+ (add IntRegs:$base, u6_0ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) &= Rt
+let AddedComplexity = 30 in
+def MEMb_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$andend),
+ "memb($base+#$offset) += $andend",
+ [(truncstorei8 (and (sextloadi8 (add IntRegs:$base,
+ u6_0ImmPred:$offset)),
+ IntRegs:$andend),
+ (add IntRegs:$base, u6_0ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) |= Rt
+let AddedComplexity = 30 in
+def MEMb_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$orend),
+ "memb($base+#$offset) |= $orend",
+ [(truncstorei8 (or (sextloadi8 (add IntRegs:$base,
+ u6_0ImmPred:$offset)),
+ IntRegs:$orend),
+ (add IntRegs:$base, u6_0ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// MEMb_ADDSUBi_V4:
+// Pseudo operation for MEMb_ADDi_V4 and MEMb_SUBi_V4
+// a later pass will change it to the right pattern.
+let AddedComplexity = 30 in
+def MEMb_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, m6Imm:$addend),
+ "Error; should not emit",
+ [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr),
+ m6ImmPred:$addend), ADDRriU6_0:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) += #U5
+let AddedComplexity = 30 in
+def MEMb_ADDi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, u5Imm:$addend),
+ "memb($addr) += $addend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) -= #U5
+let AddedComplexity = 30 in
+def MEMb_SUBi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, u5Imm:$subend),
+ "memb($addr) -= $subend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) += Rt
+let AddedComplexity = 30 in
+def MEMb_ADDr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$addend),
+ "memb($addr) += $addend",
+ [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr),
+ IntRegs:$addend), ADDRriU6_0:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) -= Rt
+let AddedComplexity = 30 in
+def MEMb_SUBr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$subend),
+ "memb($addr) -= $subend",
+ [(truncstorei8 (sub (sextloadi8 ADDRriU6_0:$addr),
+ IntRegs:$subend), ADDRriU6_0:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) &= Rt
+let AddedComplexity = 30 in
+def MEMb_ANDr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$andend),
+ "memb($addr) &= $andend",
+ [(truncstorei8 (and (sextloadi8 ADDRriU6_0:$addr),
+ IntRegs:$andend), ADDRriU6_0:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) |= Rt
+let AddedComplexity = 30 in
+def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$orend),
+ "memb($addr) |= $orend",
+ [(truncstorei8 (or (sextloadi8 ADDRriU6_0:$addr),
+ IntRegs:$orend), ADDRriU6_0:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PRED +
+//===----------------------------------------------------------------------===//
+
+// Hexagon V4 only supports these flavors of byte/half compare instructions:
+// EQ/GT/GTU. Other flavors like GE/GEU/LT/LTU/LE/LEU are not supported by
+// hardware. However, compiler can still implement these patterns through
+// appropriate patterns combinations based on current implemented patterns.
+// The implemented patterns are: EQ/GT/GTU.
+// Missing patterns are: GE/GEU/LT/LTU/LE/LEU.
+
+// Pd=cmpb.eq(Rs,#u8)
+let isCompare = 1 in
+def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, u8Imm:$src2),
+ "$dst = cmpb.eq($src1, #$src2)",
+ [(set PredRegs:$dst, (seteq (and IntRegs:$src1, 255),
+ u8ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.eq(Rs,Rt)
+let isCompare = 1 in
+def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmpb.eq($src1, $src2)",
+ [(set PredRegs:$dst, (seteq (and (xor IntRegs:$src1,
+ IntRegs:$src2),
+ 255),
+ 0))]>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.eq(Rs,Rt)
+let isCompare = 1 in
+def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmpb.eq($src1, $src2)",
+ [(set PredRegs:$dst, (seteq (shl IntRegs:$src1, (i32 24)),
+ (shl IntRegs:$src2, (i32 24))))]>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.gt(Rs,#s8)
+let isCompare = 1 in
+def CMPbGTri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, s32Imm:$src2),
+ "$dst = cmpb.gt($src1, #$src2)",
+ [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 24)),
+ s32_24ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.gt(Rs,Rt)
+let isCompare = 1 in
+def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmpb.gt($src1, $src2)",
+ [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 24)),
+ (shl IntRegs:$src2, (i32 24))))]>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.gtu(Rs,#u7)
+let isCompare = 1 in
+def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, u7Imm:$src2),
+ "$dst = cmpb.gtu($src1, #$src2)",
+ [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 255),
+ u7ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.gtu(Rs,Rt)
+let isCompare = 1 in
+def CMPbGTUrr_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmpb.gtu($src1, $src2)",
+ [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 255),
+ (and IntRegs:$src2, 255)))]>,
+ Requires<[HasV4T]>;
+
+// Signed half compare(.eq) ri.
+// Pd=cmph.eq(Rs,#s8)
+let isCompare = 1 in
+def CMPhEQri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, u16Imm:$src2),
+ "$dst = cmph.eq($src1, #$src2)",
+ [(set PredRegs:$dst, (seteq (and IntRegs:$src1, 65535),
+ u16_s8ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// Signed half compare(.eq) rr.
+// Case 1: xor + and, then compare:
+// r0=xor(r0,r1)
+// r0=and(r0,#0xffff)
+// p0=cmp.eq(r0,#0)
+// Pd=cmph.eq(Rs,Rt)
+let isCompare = 1 in
+def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmph.eq($src1, $src2)",
+ [(set PredRegs:$dst, (seteq (and (xor IntRegs:$src1,
+ IntRegs:$src2),
+ 65535),
+ 0))]>,
+ Requires<[HasV4T]>;
+
+// Signed half compare(.eq) rr.
+// Case 2: shift left 16 bits then compare:
+// r0=asl(r0,16)
+// r1=asl(r1,16)
+// p0=cmp.eq(r0,r1)
+// Pd=cmph.eq(Rs,Rt)
+let isCompare = 1 in
+def CMPhEQrr_shl_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmph.eq($src1, $src2)",
+ [(set PredRegs:$dst, (seteq (shl IntRegs:$src1, (i32 16)),
+ (shl IntRegs:$src2, (i32 16))))]>,
+ Requires<[HasV4T]>;
+
+// Signed half compare(.gt) ri.
+// Pd=cmph.gt(Rs,#s8)
+let isCompare = 1 in
+def CMPhGTri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, s32Imm:$src2),
+ "$dst = cmph.gt($src1, #$src2)",
+ [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 16)),
+ s32_16s8ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// Signed half compare(.gt) rr.
+// Pd=cmph.gt(Rs,Rt)
+let isCompare = 1 in
+def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmph.gt($src1, $src2)",
+ [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 16)),
+ (shl IntRegs:$src2, (i32 16))))]>,
+ Requires<[HasV4T]>;
+
+// Unsigned half compare rr (.gtu).
+// Pd=cmph.gtu(Rs,Rt)
+let isCompare = 1 in
+def CMPhGTUrr_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmph.gtu($src1, $src2)",
+ [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 65535),
+ (and IntRegs:$src2, 65535)))]>,
+ Requires<[HasV4T]>;
+
+// Unsigned half compare ri (.gtu).
+// Pd=cmph.gtu(Rs,#u7)
+let isCompare = 1 in
+def CMPhGTUri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, u7Imm:$src2),
+ "$dst = cmph.gtu($src1, #$src2)",
+ [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 65535),
+ u7ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PRED -
+//===----------------------------------------------------------------------===//
+
+//Deallocate frame and return.
+// dealloc_return
+let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicable = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ def DEALLOC_RET_V4 : NVInst_V4<(outs), (ins i32imm:$amt1),
+ "dealloc_return",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (Ps) dealloc_return
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ def DEALLOC_RET_cPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, i32imm:$amt1),
+ "if ($src1) dealloc_return",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (!Ps) dealloc_return
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ def DEALLOC_RET_cNotPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+ i32imm:$amt1),
+ "if (!$src1) dealloc_return",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (Ps.new) dealloc_return:nt
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ def DEALLOC_RET_cdnPnt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+ i32imm:$amt1),
+ "if ($src1.new) dealloc_return:nt",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (!Ps.new) dealloc_return:nt
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ def DEALLOC_RET_cNotdnPnt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+ i32imm:$amt1),
+ "if (!$src1.new) dealloc_return:nt",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (Ps.new) dealloc_return:t
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ def DEALLOC_RET_cdnPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+ i32imm:$amt1),
+ "if ($src1.new) dealloc_return:t",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (!Ps.new) dealloc_return:nt
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ def DEALLOC_RET_cNotdnPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+ i32imm:$amt1),
+ "if (!$src1.new) dealloc_return:t",
+ []>,
+ Requires<[HasV4T]>;
+}
diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td
new file mode 100644
index 0000000..1328eba
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -0,0 +1,3462 @@
+//===- HexagonIntrinsics.td - Instruction intrinsics -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is populated based on the following specs:
+// Hexagon V2 Architecture
+// Application-Level Specification
+// 80-V9418-8 Rev. B
+// March 4, 2008
+//===----------------------------------------------------------------------===//
+
+//
+// ALU 32 types.
+//
+
+class qi_ALU32_sisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_ALU32_sis10<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_ALU32_sis8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_ALU32_siu8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_ALU32_siu9<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_qisisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_ALU32_qis8si<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2,
+ IntRegs:$src3))]>;
+
+class si_ALU32_qisis8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ s8Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ imm:$src3))]>;
+
+class si_ALU32_qis8s8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2, s8Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>;
+
+class si_ALU32_sisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU32_sisi_sat<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU32_sisi_rnd<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU32_sis16<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s16Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_sis10<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_s10si<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins s10Imm:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")),
+ [(set IntRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>;
+
+class si_lo_ALU32_siu16<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2),
+ !strconcat("$dst.l = ", !strconcat(opc , "#$src2")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_hi_ALU32_siu16<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2),
+ !strconcat("$dst.h = ", !strconcat(opc , "#$src2")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_s16<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins s16Imm:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "#$src1")),
+ [(set IntRegs:$dst, (IntID imm:$src1))]>;
+
+class di_ALU32_s8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "#$src1")),
+ [(set DoubleRegs:$dst, (IntID imm:$src1))]>;
+
+class di_ALU64_di<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "$src")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class si_ALU32_si<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_ALU32_si_tfr<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "$src")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+//
+// ALU 64 types.
+//
+
+class si_ALU64_si_sat<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_ALU64_didi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_ALU64_sidi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_ALU64_didi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_ALU64_qididi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2,
+ DoubleRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2,
+ DoubleRegs:$src3))]>;
+
+class di_ALU64_sisi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_ALU64_didi_sat<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_rnd<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_crnd<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_rnd_sat<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_crnd_sat<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class qi_ALU64_didi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class si_ALU64_sisi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_sat_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_hh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_hl<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_ll<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_hh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_hl<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_ll<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_hh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):sat:<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.H):sat:<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_hl<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.L):sat:<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_ll<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.L):sat:<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_hh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_hl<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_ll<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_ll<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_sat<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+//
+// SInst classes.
+//
+
+class qi_SInst_qi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
+
+class qi_SInst_qi_pxfer<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "$src")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
+
+class qi_SInst_qiqi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_qiqi_neg<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, !$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_SInst_di<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class di_SInst_di_sat<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class si_SInst_di<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class si_SInst_di_sat<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class di_SInst_disi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class di_SInst_didi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_SInst_si<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>;
+
+class si_SInst_sisiu3<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u3Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ imm:$src3))]>;
+
+class si_SInst_diu5<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+class si_SInst_disi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class si_SInst_sidi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_SInst_disisi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class di_SInst_sisi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_siu5<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_SInst_siu6<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u6Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_SInst_sisi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_SInst_si<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_si_sat<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class di_SInst_qi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_qi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "$src")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_qiqi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_si<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "$src")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_sisi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_SInst_diu6<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+class si_SInst_siu5<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_SInst_siu5_rnd<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_SInst_siu5u5<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2, u5Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>;
+
+class si_SInst_sisisi_acc<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisisi_nac<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didisi_acc<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didisi_nac<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1, IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisiu5u5<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2, u5Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, #$src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2, imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisidi<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6u6<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u6Imm:$src2, u6Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, #$src2, #$src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ imm:$src2, imm:$src3))],
+ "$dst2 = $dst">;
+
+class di_SInst_dididi<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_diu6u6<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2,
+ u6Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2,
+ imm:$src3))]>;
+
+class di_SInst_didisi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3))]>;
+
+class di_SInst_didiqi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3))]>;
+
+class di_SInst_didiu3<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+ u3Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
+ imm:$src3))]>;
+
+class di_SInst_didisi_or<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didisi_and<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6_and<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u6Imm:$src2),
+ !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6_or<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u6Imm:$src2),
+ !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6_xor<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u6Imm:$src2),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisisi_and<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisisi_or<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+
+class si_SInst_sisiu5_and<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisiu5_or<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisiu5_xor<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisiu5_acc<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisiu5_nac<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6_acc<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1, imm:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6_nac<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+
+//
+// MInst classes.
+//
+
+class di_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_hh<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1:rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_hl<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.L):rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1:rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_lh<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.H):rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1:rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_ll<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.L):rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_disisi_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_sat_conj<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_sat_conj<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2*):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_s1_sat_conj<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1, $src2*):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_s1_sat_conj<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1, $src2*):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_s8s8<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins s8Imm:$src1, s8Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "(#$src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID imm:$src1, imm:$src2))]>;
+
+class si_MInst_sisi<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hh<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_lh<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hl<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_ll<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+
+class si_MInst_sisi_hh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_lh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_hl<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_ll<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_up<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didi<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_MInst_didi_conj<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_MInst_sisi_s1_sat_conj<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2*):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):<<1:rnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_MInst_didi_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_MInst_didi_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):rnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class si_SInst_sisi_sat<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_l_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2.L):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_h_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2.H):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_sat_conj<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2*):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_s1_rnd_sat_conj<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2*):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisisi_xacc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst += ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst -= ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisis8_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ s8Imm:$src3),
+ !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisis8_nac<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ s8Imm:$src3),
+ !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisiu4u5<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u4Imm:$src2, u5Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, #$src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2, imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisiu8_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ u8Imm:$src3),
+ !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisiu8_nac<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ u8Imm:$src3),
+ !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_ALU32_sisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_sat_conj<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*):sat")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_s1_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didi_s1_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class si_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class si_MInst_didi_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hl<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_lh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_ll<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hl<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.H, $src2.L):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.H, $src2.L):<<1:rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hl<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.H, $src2.L):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.H, $src2.L):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_lh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.H):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_lh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.H):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.H):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.H):<<1:rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_ll<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.L):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.L):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_ll<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.L):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.L):<<1:rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_dididi_acc_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
+ DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_rnd_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):rnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):<<1:rnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_dididi_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_conj<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1.H, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1.H, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1.L, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1.L, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ",
+ !strconcat(opc , "($src1.H, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ",
+ !strconcat(opc , "($src1.H, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ",
+ !strconcat(opc , "($src1.L, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ",
+ !strconcat(opc , "($src1.L, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disi_s1_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didisi_acc_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_disi_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1, $src2):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_didi<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+
+/********************************************************************
+* ALU32/ALU *
+*********************************************************************/
+
+// ALU32 / ALU / Add.
+def Hexagon_A2_add:
+ si_ALU32_sisi <"add", int_hexagon_A2_add>;
+def Hexagon_A2_addi:
+ si_ALU32_sis16 <"add", int_hexagon_A2_addi>;
+
+// ALU32 / ALU / Logical operations.
+def Hexagon_A2_and:
+ si_ALU32_sisi <"and", int_hexagon_A2_and>;
+def Hexagon_A2_andir:
+ si_ALU32_sis10 <"and", int_hexagon_A2_andir>;
+def Hexagon_A2_not:
+ si_ALU32_si <"not", int_hexagon_A2_not>;
+def Hexagon_A2_or:
+ si_ALU32_sisi <"or", int_hexagon_A2_or>;
+def Hexagon_A2_orir:
+ si_ALU32_sis10 <"or", int_hexagon_A2_orir>;
+def Hexagon_A2_xor:
+ si_ALU32_sisi <"xor", int_hexagon_A2_xor>;
+
+// ALU32 / ALU / Negate.
+def Hexagon_A2_neg:
+ si_ALU32_si <"neg", int_hexagon_A2_neg>;
+
+// ALU32 / ALU / Subtract.
+def Hexagon_A2_sub:
+ si_ALU32_sisi <"sub", int_hexagon_A2_sub>;
+def Hexagon_A2_subri:
+ si_ALU32_s10si <"sub", int_hexagon_A2_subri>;
+
+// ALU32 / ALU / Transfer Immediate.
+def Hexagon_A2_tfril:
+ si_lo_ALU32_siu16 <"", int_hexagon_A2_tfril>;
+def Hexagon_A2_tfrih:
+ si_hi_ALU32_siu16 <"", int_hexagon_A2_tfrih>;
+def Hexagon_A2_tfrsi:
+ si_ALU32_s16 <"", int_hexagon_A2_tfrsi>;
+def Hexagon_A2_tfrpi:
+ di_ALU32_s8 <"", int_hexagon_A2_tfrpi>;
+
+// ALU32 / ALU / Transfer Register.
+def Hexagon_A2_tfr:
+ si_ALU32_si_tfr <"", int_hexagon_A2_tfr>;
+
+/********************************************************************
+* ALU32/PERM *
+*********************************************************************/
+
+// ALU32 / PERM / Combine.
+def Hexagon_A2_combinew:
+ di_ALU32_sisi <"combine", int_hexagon_A2_combinew>;
+def Hexagon_A2_combine_hh:
+ si_MInst_sisi_hh <"combine", int_hexagon_A2_combine_hh>;
+def Hexagon_A2_combine_lh:
+ si_MInst_sisi_lh <"combine", int_hexagon_A2_combine_lh>;
+def Hexagon_A2_combine_hl:
+ si_MInst_sisi_hl <"combine", int_hexagon_A2_combine_hl>;
+def Hexagon_A2_combine_ll:
+ si_MInst_sisi_ll <"combine", int_hexagon_A2_combine_ll>;
+def Hexagon_A2_combineii:
+ di_MInst_s8s8 <"combine", int_hexagon_A2_combineii>;
+
+// ALU32 / PERM / Mux.
+def Hexagon_C2_mux:
+ si_ALU32_qisisi <"mux", int_hexagon_C2_mux>;
+def Hexagon_C2_muxri:
+ si_ALU32_qis8si <"mux", int_hexagon_C2_muxri>;
+def Hexagon_C2_muxir:
+ si_ALU32_qisis8 <"mux", int_hexagon_C2_muxir>;
+def Hexagon_C2_muxii:
+ si_ALU32_qis8s8 <"mux", int_hexagon_C2_muxii>;
+
+// ALU32 / PERM / Shift halfword.
+def Hexagon_A2_aslh:
+ si_ALU32_si <"aslh", int_hexagon_A2_aslh>;
+def Hexagon_A2_asrh:
+ si_ALU32_si <"asrh", int_hexagon_A2_asrh>;
+def SI_to_SXTHI_asrh:
+ si_ALU32_si <"asrh", int_hexagon_SI_to_SXTHI_asrh>;
+
+// ALU32 / PERM / Sign/zero extend.
+def Hexagon_A2_sxth:
+ si_ALU32_si <"sxth", int_hexagon_A2_sxth>;
+def Hexagon_A2_sxtb:
+ si_ALU32_si <"sxtb", int_hexagon_A2_sxtb>;
+def Hexagon_A2_zxth:
+ si_ALU32_si <"zxth", int_hexagon_A2_zxth>;
+def Hexagon_A2_zxtb:
+ si_ALU32_si <"zxtb", int_hexagon_A2_zxtb>;
+
+/********************************************************************
+* ALU32/PRED *
+*********************************************************************/
+
+// ALU32 / PRED / Compare.
+def Hexagon_C2_cmpeq:
+ qi_ALU32_sisi <"cmp.eq", int_hexagon_C2_cmpeq>;
+def Hexagon_C2_cmpeqi:
+ qi_ALU32_sis10 <"cmp.eq", int_hexagon_C2_cmpeqi>;
+def Hexagon_C2_cmpgei:
+ qi_ALU32_sis8 <"cmp.ge", int_hexagon_C2_cmpgei>;
+def Hexagon_C2_cmpgeui:
+ qi_ALU32_siu8 <"cmp.geu", int_hexagon_C2_cmpgeui>;
+def Hexagon_C2_cmpgt:
+ qi_ALU32_sisi <"cmp.gt", int_hexagon_C2_cmpgt>;
+def Hexagon_C2_cmpgti:
+ qi_ALU32_sis10 <"cmp.gt", int_hexagon_C2_cmpgti>;
+def Hexagon_C2_cmpgtu:
+ qi_ALU32_sisi <"cmp.gtu", int_hexagon_C2_cmpgtu>;
+def Hexagon_C2_cmpgtui:
+ qi_ALU32_siu9 <"cmp.gtu", int_hexagon_C2_cmpgtui>;
+def Hexagon_C2_cmplt:
+ qi_ALU32_sisi <"cmp.lt", int_hexagon_C2_cmplt>;
+def Hexagon_C2_cmpltu:
+ qi_ALU32_sisi <"cmp.ltu", int_hexagon_C2_cmpltu>;
+
+/********************************************************************
+* ALU32/VH *
+*********************************************************************/
+
+// ALU32 / VH / Vector add halfwords.
+// Rd32=vadd[u]h(Rs32,Rt32:sat]
+def Hexagon_A2_svaddh:
+ si_ALU32_sisi <"vaddh", int_hexagon_A2_svaddh>;
+def Hexagon_A2_svaddhs:
+ si_ALU32_sisi_sat <"vaddh", int_hexagon_A2_svaddhs>;
+def Hexagon_A2_svadduhs:
+ si_ALU32_sisi_sat <"vadduh", int_hexagon_A2_svadduhs>;
+
+// ALU32 / VH / Vector average halfwords.
+def Hexagon_A2_svavgh:
+ si_ALU32_sisi <"vavgh", int_hexagon_A2_svavgh>;
+def Hexagon_A2_svavghs:
+ si_ALU32_sisi_rnd <"vavgh", int_hexagon_A2_svavghs>;
+def Hexagon_A2_svnavgh:
+ si_ALU32_sisi <"vnavgh", int_hexagon_A2_svnavgh>;
+
+// ALU32 / VH / Vector subtract halfwords.
+def Hexagon_A2_svsubh:
+ si_ALU32_sisi <"vsubh", int_hexagon_A2_svsubh>;
+def Hexagon_A2_svsubhs:
+ si_ALU32_sisi_sat <"vsubh", int_hexagon_A2_svsubhs>;
+def Hexagon_A2_svsubuhs:
+ si_ALU32_sisi_sat <"vsubuh", int_hexagon_A2_svsubuhs>;
+
+/********************************************************************
+* ALU64/ALU *
+*********************************************************************/
+
+// ALU64 / ALU / Add.
+def Hexagon_A2_addp:
+ di_ALU64_didi <"add", int_hexagon_A2_addp>;
+def Hexagon_A2_addsat:
+ si_ALU64_sisi_sat <"add", int_hexagon_A2_addsat>;
+
+// ALU64 / ALU / Add halfword.
+// Even though the definition says hl, it should be lh -
+//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits.
+def Hexagon_A2_addh_l16_hl:
+ si_ALU64_sisi_l16_lh <"add", int_hexagon_A2_addh_l16_hl>;
+def Hexagon_A2_addh_l16_ll:
+ si_ALU64_sisi_l16_ll <"add", int_hexagon_A2_addh_l16_ll>;
+
+def Hexagon_A2_addh_l16_sat_hl:
+ si_ALU64_sisi_l16_sat_lh <"add", int_hexagon_A2_addh_l16_sat_hl>;
+def Hexagon_A2_addh_l16_sat_ll:
+ si_ALU64_sisi_l16_sat_ll <"add", int_hexagon_A2_addh_l16_sat_ll>;
+
+def Hexagon_A2_addh_h16_hh:
+ si_ALU64_sisi_h16_hh <"add", int_hexagon_A2_addh_h16_hh>;
+def Hexagon_A2_addh_h16_hl:
+ si_ALU64_sisi_h16_hl <"add", int_hexagon_A2_addh_h16_hl>;
+def Hexagon_A2_addh_h16_lh:
+ si_ALU64_sisi_h16_lh <"add", int_hexagon_A2_addh_h16_lh>;
+def Hexagon_A2_addh_h16_ll:
+ si_ALU64_sisi_h16_ll <"add", int_hexagon_A2_addh_h16_ll>;
+
+def Hexagon_A2_addh_h16_sat_hh:
+ si_ALU64_sisi_h16_sat_hh <"add", int_hexagon_A2_addh_h16_sat_hh>;
+def Hexagon_A2_addh_h16_sat_hl:
+ si_ALU64_sisi_h16_sat_hl <"add", int_hexagon_A2_addh_h16_sat_hl>;
+def Hexagon_A2_addh_h16_sat_lh:
+ si_ALU64_sisi_h16_sat_lh <"add", int_hexagon_A2_addh_h16_sat_lh>;
+def Hexagon_A2_addh_h16_sat_ll:
+ si_ALU64_sisi_h16_sat_ll <"add", int_hexagon_A2_addh_h16_sat_ll>;
+
+// ALU64 / ALU / Compare.
+def Hexagon_C2_cmpeqp:
+ qi_ALU64_didi <"cmp.eq", int_hexagon_C2_cmpeqp>;
+def Hexagon_C2_cmpgtp:
+ qi_ALU64_didi <"cmp.gt", int_hexagon_C2_cmpgtp>;
+def Hexagon_C2_cmpgtup:
+ qi_ALU64_didi <"cmp.gtu", int_hexagon_C2_cmpgtup>;
+
+// ALU64 / ALU / Logical operations.
+def Hexagon_A2_andp:
+ di_ALU64_didi <"and", int_hexagon_A2_andp>;
+def Hexagon_A2_orp:
+ di_ALU64_didi <"or", int_hexagon_A2_orp>;
+def Hexagon_A2_xorp:
+ di_ALU64_didi <"xor", int_hexagon_A2_xorp>;
+
+// ALU64 / ALU / Maximum.
+def Hexagon_A2_max:
+ si_ALU64_sisi <"max", int_hexagon_A2_max>;
+def Hexagon_A2_maxu:
+ si_ALU64_sisi <"maxu", int_hexagon_A2_maxu>;
+
+// ALU64 / ALU / Minimum.
+def Hexagon_A2_min:
+ si_ALU64_sisi <"min", int_hexagon_A2_min>;
+def Hexagon_A2_minu:
+ si_ALU64_sisi <"minu", int_hexagon_A2_minu>;
+
+// ALU64 / ALU / Subtract.
+def Hexagon_A2_subp:
+ di_ALU64_didi <"sub", int_hexagon_A2_subp>;
+def Hexagon_A2_subsat:
+ si_ALU64_sisi_sat <"sub", int_hexagon_A2_subsat>;
+
+// ALU64 / ALU / Subtract halfword.
+// Even though the definition says hl, it should be lh -
+//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits.
+def Hexagon_A2_subh_l16_hl:
+ si_ALU64_sisi_l16_lh <"sub", int_hexagon_A2_subh_l16_hl>;
+def Hexagon_A2_subh_l16_ll:
+ si_ALU64_sisi_l16_ll <"sub", int_hexagon_A2_subh_l16_ll>;
+
+def Hexagon_A2_subh_l16_sat_hl:
+ si_ALU64_sisi_l16_sat_lh <"sub", int_hexagon_A2_subh_l16_sat_hl>;
+def Hexagon_A2_subh_l16_sat_ll:
+ si_ALU64_sisi_l16_sat_ll <"sub", int_hexagon_A2_subh_l16_sat_ll>;
+
+def Hexagon_A2_subh_h16_hh:
+ si_ALU64_sisi_h16_hh <"sub", int_hexagon_A2_subh_h16_hh>;
+def Hexagon_A2_subh_h16_hl:
+ si_ALU64_sisi_h16_hl <"sub", int_hexagon_A2_subh_h16_hl>;
+def Hexagon_A2_subh_h16_lh:
+ si_ALU64_sisi_h16_lh <"sub", int_hexagon_A2_subh_h16_lh>;
+def Hexagon_A2_subh_h16_ll:
+ si_ALU64_sisi_h16_ll <"sub", int_hexagon_A2_subh_h16_ll>;
+
+def Hexagon_A2_subh_h16_sat_hh:
+ si_ALU64_sisi_h16_sat_hh <"sub", int_hexagon_A2_subh_h16_sat_hh>;
+def Hexagon_A2_subh_h16_sat_hl:
+ si_ALU64_sisi_h16_sat_hl <"sub", int_hexagon_A2_subh_h16_sat_hl>;
+def Hexagon_A2_subh_h16_sat_lh:
+ si_ALU64_sisi_h16_sat_lh <"sub", int_hexagon_A2_subh_h16_sat_lh>;
+def Hexagon_A2_subh_h16_sat_ll:
+ si_ALU64_sisi_h16_sat_ll <"sub", int_hexagon_A2_subh_h16_sat_ll>;
+
+// ALU64 / ALU / Transfer register.
+def Hexagon_A2_tfrp:
+ di_ALU64_di <"", int_hexagon_A2_tfrp>;
+
+/********************************************************************
+* ALU64/BIT *
+*********************************************************************/
+
+// ALU64 / BIT / Masked parity.
+def Hexagon_S2_parityp:
+ si_ALU64_didi <"parity", int_hexagon_S2_parityp>;
+
+/********************************************************************
+* ALU64/PERM *
+*********************************************************************/
+
+// ALU64 / PERM / Vector pack high and low halfwords.
+def Hexagon_S2_packhl:
+ di_ALU64_sisi <"packhl", int_hexagon_S2_packhl>;
+
+/********************************************************************
+* ALU64/VB *
+*********************************************************************/
+
+// ALU64 / VB / Vector add unsigned bytes.
+def Hexagon_A2_vaddub:
+ di_ALU64_didi <"vaddub", int_hexagon_A2_vaddub>;
+def Hexagon_A2_vaddubs:
+ di_ALU64_didi_sat <"vaddub", int_hexagon_A2_vaddubs>;
+
+// ALU64 / VB / Vector average unsigned bytes.
+def Hexagon_A2_vavgub:
+ di_ALU64_didi <"vavgub", int_hexagon_A2_vavgub>;
+def Hexagon_A2_vavgubr:
+ di_ALU64_didi_rnd <"vavgub", int_hexagon_A2_vavgubr>;
+
+// ALU64 / VB / Vector compare unsigned bytes.
+def Hexagon_A2_vcmpbeq:
+ qi_ALU64_didi <"vcmpb.eq", int_hexagon_A2_vcmpbeq>;
+def Hexagon_A2_vcmpbgtu:
+ qi_ALU64_didi <"vcmpb.gtu",int_hexagon_A2_vcmpbgtu>;
+
+// ALU64 / VB / Vector maximum/minimum unsigned bytes.
+def Hexagon_A2_vmaxub:
+ di_ALU64_didi <"vmaxub", int_hexagon_A2_vmaxub>;
+def Hexagon_A2_vminub:
+ di_ALU64_didi <"vminub", int_hexagon_A2_vminub>;
+
+// ALU64 / VB / Vector subtract unsigned bytes.
+def Hexagon_A2_vsubub:
+ di_ALU64_didi <"vsubub", int_hexagon_A2_vsubub>;
+def Hexagon_A2_vsububs:
+ di_ALU64_didi_sat <"vsubub", int_hexagon_A2_vsububs>;
+
+// ALU64 / VB / Vector mux.
+def Hexagon_C2_vmux:
+ di_ALU64_qididi <"vmux", int_hexagon_C2_vmux>;
+
+
+/********************************************************************
+* ALU64/VH *
+*********************************************************************/
+
+// ALU64 / VH / Vector add halfwords.
+// Rdd64=vadd[u]h(Rss64,Rtt64:sat]
+def Hexagon_A2_vaddh:
+ di_ALU64_didi <"vaddh", int_hexagon_A2_vaddh>;
+def Hexagon_A2_vaddhs:
+ di_ALU64_didi_sat <"vaddh", int_hexagon_A2_vaddhs>;
+def Hexagon_A2_vadduhs:
+ di_ALU64_didi_sat <"vadduh", int_hexagon_A2_vadduhs>;
+
+// ALU64 / VH / Vector average halfwords.
+// Rdd64=v[n]avg[u]h(Rss64,Rtt64:rnd/:crnd][:sat]
+def Hexagon_A2_vavgh:
+ di_ALU64_didi <"vavgh", int_hexagon_A2_vavgh>;
+def Hexagon_A2_vavghcr:
+ di_ALU64_didi_crnd <"vavgh", int_hexagon_A2_vavghcr>;
+def Hexagon_A2_vavghr:
+ di_ALU64_didi_rnd <"vavgh", int_hexagon_A2_vavghr>;
+def Hexagon_A2_vavguh:
+ di_ALU64_didi <"vavguh", int_hexagon_A2_vavguh>;
+def Hexagon_A2_vavguhr:
+ di_ALU64_didi_rnd <"vavguh", int_hexagon_A2_vavguhr>;
+def Hexagon_A2_vnavgh:
+ di_ALU64_didi <"vnavgh", int_hexagon_A2_vnavgh>;
+def Hexagon_A2_vnavghcr:
+ di_ALU64_didi_crnd_sat <"vnavgh", int_hexagon_A2_vnavghcr>;
+def Hexagon_A2_vnavghr:
+ di_ALU64_didi_rnd_sat <"vnavgh", int_hexagon_A2_vnavghr>;
+
+// ALU64 / VH / Vector compare halfwords.
+def Hexagon_A2_vcmpheq:
+ qi_ALU64_didi <"vcmph.eq", int_hexagon_A2_vcmpheq>;
+def Hexagon_A2_vcmphgt:
+ qi_ALU64_didi <"vcmph.gt", int_hexagon_A2_vcmphgt>;
+def Hexagon_A2_vcmphgtu:
+ qi_ALU64_didi <"vcmph.gtu",int_hexagon_A2_vcmphgtu>;
+
+// ALU64 / VH / Vector maximum halfwords.
+def Hexagon_A2_vmaxh:
+ di_ALU64_didi <"vmaxh", int_hexagon_A2_vmaxh>;
+def Hexagon_A2_vmaxuh:
+ di_ALU64_didi <"vmaxuh", int_hexagon_A2_vmaxuh>;
+
+// ALU64 / VH / Vector minimum halfwords.
+def Hexagon_A2_vminh:
+ di_ALU64_didi <"vminh", int_hexagon_A2_vminh>;
+def Hexagon_A2_vminuh:
+ di_ALU64_didi <"vminuh", int_hexagon_A2_vminuh>;
+
+// ALU64 / VH / Vector subtract halfwords.
+def Hexagon_A2_vsubh:
+ di_ALU64_didi <"vsubh", int_hexagon_A2_vsubh>;
+def Hexagon_A2_vsubhs:
+ di_ALU64_didi_sat <"vsubh", int_hexagon_A2_vsubhs>;
+def Hexagon_A2_vsubuhs:
+ di_ALU64_didi_sat <"vsubuh", int_hexagon_A2_vsubuhs>;
+
+
+/********************************************************************
+* ALU64/VW *
+*********************************************************************/
+
+// ALU64 / VW / Vector add words.
+// Rdd32=vaddw(Rss32,Rtt32)[:sat]
+def Hexagon_A2_vaddw:
+ di_ALU64_didi <"vaddw", int_hexagon_A2_vaddw>;
+def Hexagon_A2_vaddws:
+ di_ALU64_didi_sat <"vaddw", int_hexagon_A2_vaddws>;
+
+// ALU64 / VW / Vector average words.
+def Hexagon_A2_vavguw:
+ di_ALU64_didi <"vavguw", int_hexagon_A2_vavguw>;
+def Hexagon_A2_vavguwr:
+ di_ALU64_didi_rnd <"vavguw", int_hexagon_A2_vavguwr>;
+def Hexagon_A2_vavgw:
+ di_ALU64_didi <"vavgw", int_hexagon_A2_vavgw>;
+def Hexagon_A2_vavgwcr:
+ di_ALU64_didi_crnd <"vavgw", int_hexagon_A2_vavgwcr>;
+def Hexagon_A2_vavgwr:
+ di_ALU64_didi_rnd <"vavgw", int_hexagon_A2_vavgwr>;
+def Hexagon_A2_vnavgw:
+ di_ALU64_didi <"vnavgw", int_hexagon_A2_vnavgw>;
+def Hexagon_A2_vnavgwcr:
+ di_ALU64_didi_crnd_sat <"vnavgw", int_hexagon_A2_vnavgwcr>;
+def Hexagon_A2_vnavgwr:
+ di_ALU64_didi_rnd_sat <"vnavgw", int_hexagon_A2_vnavgwr>;
+
+// ALU64 / VW / Vector compare words.
+def Hexagon_A2_vcmpweq:
+ qi_ALU64_didi <"vcmpw.eq", int_hexagon_A2_vcmpweq>;
+def Hexagon_A2_vcmpwgt:
+ qi_ALU64_didi <"vcmpw.gt", int_hexagon_A2_vcmpwgt>;
+def Hexagon_A2_vcmpwgtu:
+ qi_ALU64_didi <"vcmpw.gtu",int_hexagon_A2_vcmpwgtu>;
+
+// ALU64 / VW / Vector maximum words.
+def Hexagon_A2_vmaxw:
+ di_ALU64_didi <"vmaxw", int_hexagon_A2_vmaxw>;
+def Hexagon_A2_vmaxuw:
+ di_ALU64_didi <"vmaxuw", int_hexagon_A2_vmaxuw>;
+
+// ALU64 / VW / Vector minimum words.
+def Hexagon_A2_vminw:
+ di_ALU64_didi <"vminw", int_hexagon_A2_vminw>;
+def Hexagon_A2_vminuw:
+ di_ALU64_didi <"vminuw", int_hexagon_A2_vminuw>;
+
+// ALU64 / VW / Vector subtract words.
+def Hexagon_A2_vsubw:
+ di_ALU64_didi <"vsubw", int_hexagon_A2_vsubw>;
+def Hexagon_A2_vsubws:
+ di_ALU64_didi_sat <"vsubw", int_hexagon_A2_vsubws>;
+
+
+/********************************************************************
+* CR *
+*********************************************************************/
+
+// CR / Logical reductions on predicates.
+def Hexagon_C2_all8:
+ qi_SInst_qi <"all8", int_hexagon_C2_all8>;
+def Hexagon_C2_any8:
+ qi_SInst_qi <"any8", int_hexagon_C2_any8>;
+
+// CR / Logical operations on predicates.
+def Hexagon_C2_pxfer_map:
+ qi_SInst_qi_pxfer <"", int_hexagon_C2_pxfer_map>;
+def Hexagon_C2_and:
+ qi_SInst_qiqi <"and", int_hexagon_C2_and>;
+def Hexagon_C2_andn:
+ qi_SInst_qiqi_neg <"and", int_hexagon_C2_andn>;
+def Hexagon_C2_not:
+ qi_SInst_qi <"not", int_hexagon_C2_not>;
+def Hexagon_C2_or:
+ qi_SInst_qiqi <"or", int_hexagon_C2_or>;
+def Hexagon_C2_orn:
+ qi_SInst_qiqi_neg <"or", int_hexagon_C2_orn>;
+def Hexagon_C2_xor:
+ qi_SInst_qiqi <"xor", int_hexagon_C2_xor>;
+
+
+/********************************************************************
+* MTYPE/ALU *
+*********************************************************************/
+
+// MTYPE / ALU / Add and accumulate.
+def Hexagon_M2_acci:
+ si_MInst_sisisi_acc <"add", int_hexagon_M2_acci>;
+def Hexagon_M2_accii:
+ si_MInst_sisis8_acc <"add", int_hexagon_M2_accii>;
+def Hexagon_M2_nacci:
+ si_MInst_sisisi_nac <"add", int_hexagon_M2_nacci>;
+def Hexagon_M2_naccii:
+ si_MInst_sisis8_nac <"add", int_hexagon_M2_naccii>;
+
+// MTYPE / ALU / Subtract and accumulate.
+def Hexagon_M2_subacc:
+ si_MInst_sisisi_acc <"sub", int_hexagon_M2_subacc>;
+
+// MTYPE / ALU / Vector absolute difference.
+def Hexagon_M2_vabsdiffh:
+ di_MInst_didi <"vabsdiffh",int_hexagon_M2_vabsdiffh>;
+def Hexagon_M2_vabsdiffw:
+ di_MInst_didi <"vabsdiffw",int_hexagon_M2_vabsdiffw>;
+
+// MTYPE / ALU / XOR and xor with destination.
+def Hexagon_M2_xor_xacc:
+ si_MInst_sisisi_xacc <"xor", int_hexagon_M2_xor_xacc>;
+
+
+/********************************************************************
+* MTYPE/COMPLEX *
+*********************************************************************/
+
+// MTYPE / COMPLEX / Complex multiply.
+// Rdd[-+]=cmpy(Rs, Rt:<<1]:sat
+def Hexagon_M2_cmpys_s1:
+ di_MInst_sisi_s1_sat <"cmpy", int_hexagon_M2_cmpys_s1>;
+def Hexagon_M2_cmpys_s0:
+ di_MInst_sisi_sat <"cmpy", int_hexagon_M2_cmpys_s0>;
+def Hexagon_M2_cmpysc_s1:
+ di_MInst_sisi_s1_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s1>;
+def Hexagon_M2_cmpysc_s0:
+ di_MInst_sisi_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s0>;
+
+def Hexagon_M2_cmacs_s1:
+ di_MInst_disisi_acc_s1_sat <"cmpy", int_hexagon_M2_cmacs_s1>;
+def Hexagon_M2_cmacs_s0:
+ di_MInst_disisi_acc_sat <"cmpy", int_hexagon_M2_cmacs_s0>;
+def Hexagon_M2_cmacsc_s1:
+ di_MInst_disisi_acc_s1_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s1>;
+def Hexagon_M2_cmacsc_s0:
+ di_MInst_disisi_acc_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s0>;
+
+def Hexagon_M2_cnacs_s1:
+ di_MInst_disisi_nac_s1_sat <"cmpy", int_hexagon_M2_cnacs_s1>;
+def Hexagon_M2_cnacs_s0:
+ di_MInst_disisi_nac_sat <"cmpy", int_hexagon_M2_cnacs_s0>;
+def Hexagon_M2_cnacsc_s1:
+ di_MInst_disisi_nac_s1_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s1>;
+def Hexagon_M2_cnacsc_s0:
+ di_MInst_disisi_nac_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s0>;
+
+// MTYPE / COMPLEX / Complex multiply real or imaginary.
+def Hexagon_M2_cmpyr_s0:
+ di_MInst_sisi <"cmpyr", int_hexagon_M2_cmpyr_s0>;
+def Hexagon_M2_cmacr_s0:
+ di_MInst_disisi_acc <"cmpyr", int_hexagon_M2_cmacr_s0>;
+
+def Hexagon_M2_cmpyi_s0:
+ di_MInst_sisi <"cmpyi", int_hexagon_M2_cmpyi_s0>;
+def Hexagon_M2_cmaci_s0:
+ di_MInst_disisi_acc <"cmpyi", int_hexagon_M2_cmaci_s0>;
+
+// MTYPE / COMPLEX / Complex multiply with round and pack.
+// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat
+def Hexagon_M2_cmpyrs_s0:
+ si_MInst_sisi_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s0>;
+def Hexagon_M2_cmpyrs_s1:
+ si_MInst_sisi_s1_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s1>;
+
+def Hexagon_M2_cmpyrsc_s0:
+ si_MInst_sisi_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s0>;
+def Hexagon_M2_cmpyrsc_s1:
+ si_MInst_sisi_s1_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s1>;
+
+//MTYPE / COMPLEX / Vector complex multiply real or imaginary.
+def Hexagon_M2_vcmpy_s0_sat_i:
+ di_MInst_didi_sat <"vcmpyi", int_hexagon_M2_vcmpy_s0_sat_i>;
+def Hexagon_M2_vcmpy_s1_sat_i:
+ di_MInst_didi_s1_sat <"vcmpyi", int_hexagon_M2_vcmpy_s1_sat_i>;
+
+def Hexagon_M2_vcmpy_s0_sat_r:
+ di_MInst_didi_sat <"vcmpyr", int_hexagon_M2_vcmpy_s0_sat_r>;
+def Hexagon_M2_vcmpy_s1_sat_r:
+ di_MInst_didi_s1_sat <"vcmpyr", int_hexagon_M2_vcmpy_s1_sat_r>;
+
+def Hexagon_M2_vcmac_s0_sat_i:
+ di_MInst_dididi_acc_sat <"vcmpyi", int_hexagon_M2_vcmac_s0_sat_i>;
+def Hexagon_M2_vcmac_s0_sat_r:
+ di_MInst_dididi_acc_sat <"vcmpyr", int_hexagon_M2_vcmac_s0_sat_r>;
+
+//MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary.
+def Hexagon_M2_vrcmpyi_s0:
+ di_MInst_didi <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0>;
+def Hexagon_M2_vrcmpyr_s0:
+ di_MInst_didi <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0>;
+
+def Hexagon_M2_vrcmpyi_s0c:
+ di_MInst_didi_conj <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0c>;
+def Hexagon_M2_vrcmpyr_s0c:
+ di_MInst_didi_conj <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0c>;
+
+def Hexagon_M2_vrcmaci_s0:
+ di_MInst_dididi_acc <"vrcmpyi", int_hexagon_M2_vrcmaci_s0>;
+def Hexagon_M2_vrcmacr_s0:
+ di_MInst_dididi_acc <"vrcmpyr", int_hexagon_M2_vrcmacr_s0>;
+
+def Hexagon_M2_vrcmaci_s0c:
+ di_MInst_dididi_acc_conj <"vrcmpyi", int_hexagon_M2_vrcmaci_s0c>;
+def Hexagon_M2_vrcmacr_s0c:
+ di_MInst_dididi_acc_conj <"vrcmpyr", int_hexagon_M2_vrcmacr_s0c>;
+
+
+/********************************************************************
+* MTYPE/MPYH *
+*********************************************************************/
+
+// MTYPE / MPYH / Multiply and use lower result.
+//def Hexagon_M2_mpysmi:
+// si_MInst_sim9 <"mpyi", int_hexagon_M2_mpysmi>;
+def Hexagon_M2_mpyi:
+ si_MInst_sisi <"mpyi", int_hexagon_M2_mpyi>;
+def Hexagon_M2_mpyui:
+ si_MInst_sisi <"mpyui", int_hexagon_M2_mpyui>;
+def Hexagon_M2_macsip:
+ si_MInst_sisiu8_acc <"mpyi", int_hexagon_M2_macsip>;
+def Hexagon_M2_maci:
+ si_MInst_sisisi_acc <"mpyi", int_hexagon_M2_maci>;
+def Hexagon_M2_macsin:
+ si_MInst_sisiu8_nac <"mpyi", int_hexagon_M2_macsin>;
+
+// MTYPE / MPYH / Multiply word by half (32x16).
+//Rdd[+]=vmpywoh(Rss,Rtt)[:<<1][:rnd][:sat]
+//Rdd[+]=vmpyweh(Rss,Rtt)[:<<1][:rnd][:sat]
+def Hexagon_M2_mmpyl_rs1:
+ di_MInst_didi_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs1>;
+def Hexagon_M2_mmpyl_s1:
+ di_MInst_didi_s1_sat <"vmpyweh", int_hexagon_M2_mmpyl_s1>;
+def Hexagon_M2_mmpyl_rs0:
+ di_MInst_didi_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs0>;
+def Hexagon_M2_mmpyl_s0:
+ di_MInst_didi_sat <"vmpyweh", int_hexagon_M2_mmpyl_s0>;
+def Hexagon_M2_mmpyh_rs1:
+ di_MInst_didi_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs1>;
+def Hexagon_M2_mmpyh_s1:
+ di_MInst_didi_s1_sat <"vmpywoh", int_hexagon_M2_mmpyh_s1>;
+def Hexagon_M2_mmpyh_rs0:
+ di_MInst_didi_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs0>;
+def Hexagon_M2_mmpyh_s0:
+ di_MInst_didi_sat <"vmpywoh", int_hexagon_M2_mmpyh_s0>;
+def Hexagon_M2_mmacls_rs1:
+ di_MInst_dididi_acc_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs1>;
+def Hexagon_M2_mmacls_s1:
+ di_MInst_dididi_acc_s1_sat <"vmpyweh", int_hexagon_M2_mmacls_s1>;
+def Hexagon_M2_mmacls_rs0:
+ di_MInst_dididi_acc_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs0>;
+def Hexagon_M2_mmacls_s0:
+ di_MInst_dididi_acc_sat <"vmpyweh", int_hexagon_M2_mmacls_s0>;
+def Hexagon_M2_mmachs_rs1:
+ di_MInst_dididi_acc_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs1>;
+def Hexagon_M2_mmachs_s1:
+ di_MInst_dididi_acc_s1_sat <"vmpywoh", int_hexagon_M2_mmachs_s1>;
+def Hexagon_M2_mmachs_rs0:
+ di_MInst_dididi_acc_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs0>;
+def Hexagon_M2_mmachs_s0:
+ di_MInst_dididi_acc_sat <"vmpywoh", int_hexagon_M2_mmachs_s0>;
+
+// MTYPE / MPYH / Multiply word by unsigned half (32x16).
+//Rdd[+]=vmpywouh(Rss,Rtt)[:<<1][:rnd][:sat]
+//Rdd[+]=vmpyweuh(Rss,Rtt)[:<<1][:rnd][:sat]
+def Hexagon_M2_mmpyul_rs1:
+ di_MInst_didi_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs1>;
+def Hexagon_M2_mmpyul_s1:
+ di_MInst_didi_s1_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s1>;
+def Hexagon_M2_mmpyul_rs0:
+ di_MInst_didi_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs0>;
+def Hexagon_M2_mmpyul_s0:
+ di_MInst_didi_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s0>;
+def Hexagon_M2_mmpyuh_rs1:
+ di_MInst_didi_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs1>;
+def Hexagon_M2_mmpyuh_s1:
+ di_MInst_didi_s1_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s1>;
+def Hexagon_M2_mmpyuh_rs0:
+ di_MInst_didi_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs0>;
+def Hexagon_M2_mmpyuh_s0:
+ di_MInst_didi_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s0>;
+def Hexagon_M2_mmaculs_rs1:
+ di_MInst_dididi_acc_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs1>;
+def Hexagon_M2_mmaculs_s1:
+ di_MInst_dididi_acc_s1_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s1>;
+def Hexagon_M2_mmaculs_rs0:
+ di_MInst_dididi_acc_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs0>;
+def Hexagon_M2_mmaculs_s0:
+ di_MInst_dididi_acc_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s0>;
+def Hexagon_M2_mmacuhs_rs1:
+ di_MInst_dididi_acc_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs1>;
+def Hexagon_M2_mmacuhs_s1:
+ di_MInst_dididi_acc_s1_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s1>;
+def Hexagon_M2_mmacuhs_rs0:
+ di_MInst_dididi_acc_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs0>;
+def Hexagon_M2_mmacuhs_s0:
+ di_MInst_dididi_acc_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s0>;
+
+// MTYPE / MPYH / Multiply and use upper result.
+def Hexagon_M2_hmmpyh_rs1:
+ si_MInst_sisi_h_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyh_rs1>;
+def Hexagon_M2_hmmpyl_rs1:
+ si_MInst_sisi_l_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyl_rs1>;
+def Hexagon_M2_mpy_up:
+ si_MInst_sisi <"mpy", int_hexagon_M2_mpy_up>;
+def Hexagon_M2_dpmpyss_rnd_s0:
+ si_MInst_sisi_rnd <"mpy", int_hexagon_M2_dpmpyss_rnd_s0>;
+def Hexagon_M2_mpyu_up:
+ si_MInst_sisi <"mpyu", int_hexagon_M2_mpyu_up>;
+
+// MTYPE / MPYH / Multiply and use full result.
+def Hexagon_M2_dpmpyuu_s0:
+ di_MInst_sisi <"mpyu", int_hexagon_M2_dpmpyuu_s0>;
+def Hexagon_M2_dpmpyuu_acc_s0:
+ di_MInst_disisi_acc <"mpyu", int_hexagon_M2_dpmpyuu_acc_s0>;
+def Hexagon_M2_dpmpyuu_nac_s0:
+ di_MInst_disisi_nac <"mpyu", int_hexagon_M2_dpmpyuu_nac_s0>;
+def Hexagon_M2_dpmpyss_s0:
+ di_MInst_sisi <"mpy", int_hexagon_M2_dpmpyss_s0>;
+def Hexagon_M2_dpmpyss_acc_s0:
+ di_MInst_disisi_acc <"mpy", int_hexagon_M2_dpmpyss_acc_s0>;
+def Hexagon_M2_dpmpyss_nac_s0:
+ di_MInst_disisi_nac <"mpy", int_hexagon_M2_dpmpyss_nac_s0>;
+
+
+/********************************************************************
+* MTYPE/MPYS *
+*********************************************************************/
+
+// MTYPE / MPYS / Scalar 16x16 multiply signed.
+//Rd=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]|
+// [:<<0[:rnd|:sat|:rnd:sat]|:<<1[:rnd|:sat|:rnd:sat]]]
+def Hexagon_M2_mpy_hh_s0:
+ si_MInst_sisi_hh <"mpy", int_hexagon_M2_mpy_hh_s0>;
+def Hexagon_M2_mpy_hh_s1:
+ si_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpy_hh_s1>;
+def Hexagon_M2_mpy_rnd_hh_s1:
+ si_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_rnd_hh_s1>;
+def Hexagon_M2_mpy_sat_rnd_hh_s1:
+ si_MInst_sisi_sat_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s1>;
+def Hexagon_M2_mpy_sat_hh_s1:
+ si_MInst_sisi_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_hh_s1>;
+def Hexagon_M2_mpy_rnd_hh_s0:
+ si_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpy_rnd_hh_s0>;
+def Hexagon_M2_mpy_sat_rnd_hh_s0:
+ si_MInst_sisi_sat_rnd_hh <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s0>;
+def Hexagon_M2_mpy_sat_hh_s0:
+ si_MInst_sisi_sat_hh <"mpy", int_hexagon_M2_mpy_sat_hh_s0>;
+
+def Hexagon_M2_mpy_hl_s0:
+ si_MInst_sisi_hl <"mpy", int_hexagon_M2_mpy_hl_s0>;
+def Hexagon_M2_mpy_hl_s1:
+ si_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpy_hl_s1>;
+def Hexagon_M2_mpy_rnd_hl_s1:
+ si_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_rnd_hl_s1>;
+def Hexagon_M2_mpy_sat_rnd_hl_s1:
+ si_MInst_sisi_sat_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s1>;
+def Hexagon_M2_mpy_sat_hl_s1:
+ si_MInst_sisi_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_hl_s1>;
+def Hexagon_M2_mpy_rnd_hl_s0:
+ si_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpy_rnd_hl_s0>;
+def Hexagon_M2_mpy_sat_rnd_hl_s0:
+ si_MInst_sisi_sat_rnd_hl <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s0>;
+def Hexagon_M2_mpy_sat_hl_s0:
+ si_MInst_sisi_sat_hl <"mpy", int_hexagon_M2_mpy_sat_hl_s0>;
+
+def Hexagon_M2_mpy_lh_s0:
+ si_MInst_sisi_lh <"mpy", int_hexagon_M2_mpy_lh_s0>;
+def Hexagon_M2_mpy_lh_s1:
+ si_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpy_lh_s1>;
+def Hexagon_M2_mpy_rnd_lh_s1:
+ si_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_rnd_lh_s1>;
+def Hexagon_M2_mpy_sat_rnd_lh_s1:
+ si_MInst_sisi_sat_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s1>;
+def Hexagon_M2_mpy_sat_lh_s1:
+ si_MInst_sisi_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_lh_s1>;
+def Hexagon_M2_mpy_rnd_lh_s0:
+ si_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpy_rnd_lh_s0>;
+def Hexagon_M2_mpy_sat_rnd_lh_s0:
+ si_MInst_sisi_sat_rnd_lh <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s0>;
+def Hexagon_M2_mpy_sat_lh_s0:
+ si_MInst_sisi_sat_lh <"mpy", int_hexagon_M2_mpy_sat_lh_s0>;
+
+def Hexagon_M2_mpy_ll_s0:
+ si_MInst_sisi_ll <"mpy", int_hexagon_M2_mpy_ll_s0>;
+def Hexagon_M2_mpy_ll_s1:
+ si_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpy_ll_s1>;
+def Hexagon_M2_mpy_rnd_ll_s1:
+ si_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_rnd_ll_s1>;
+def Hexagon_M2_mpy_sat_rnd_ll_s1:
+ si_MInst_sisi_sat_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s1>;
+def Hexagon_M2_mpy_sat_ll_s1:
+ si_MInst_sisi_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_ll_s1>;
+def Hexagon_M2_mpy_rnd_ll_s0:
+ si_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpy_rnd_ll_s0>;
+def Hexagon_M2_mpy_sat_rnd_ll_s0:
+ si_MInst_sisi_sat_rnd_ll <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s0>;
+def Hexagon_M2_mpy_sat_ll_s0:
+ si_MInst_sisi_sat_ll <"mpy", int_hexagon_M2_mpy_sat_ll_s0>;
+
+//Rdd=mpy(Rs.[H|L],Rt.[H|L])[[:<<0|:<<1]|[:<<0:rnd|:<<1:rnd]]
+def Hexagon_M2_mpyd_hh_s0:
+ di_MInst_sisi_hh <"mpy", int_hexagon_M2_mpyd_hh_s0>;
+def Hexagon_M2_mpyd_hh_s1:
+ di_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpyd_hh_s1>;
+def Hexagon_M2_mpyd_rnd_hh_s1:
+ di_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hh_s1>;
+def Hexagon_M2_mpyd_rnd_hh_s0:
+ di_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpyd_rnd_hh_s0>;
+
+def Hexagon_M2_mpyd_hl_s0:
+ di_MInst_sisi_hl <"mpy", int_hexagon_M2_mpyd_hl_s0>;
+def Hexagon_M2_mpyd_hl_s1:
+ di_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpyd_hl_s1>;
+def Hexagon_M2_mpyd_rnd_hl_s1:
+ di_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hl_s1>;
+def Hexagon_M2_mpyd_rnd_hl_s0:
+ di_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpyd_rnd_hl_s0>;
+
+def Hexagon_M2_mpyd_lh_s0:
+ di_MInst_sisi_lh <"mpy", int_hexagon_M2_mpyd_lh_s0>;
+def Hexagon_M2_mpyd_lh_s1:
+ di_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpyd_lh_s1>;
+def Hexagon_M2_mpyd_rnd_lh_s1:
+ di_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_lh_s1>;
+def Hexagon_M2_mpyd_rnd_lh_s0:
+ di_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpyd_rnd_lh_s0>;
+
+def Hexagon_M2_mpyd_ll_s0:
+ di_MInst_sisi_ll <"mpy", int_hexagon_M2_mpyd_ll_s0>;
+def Hexagon_M2_mpyd_ll_s1:
+ di_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpyd_ll_s1>;
+def Hexagon_M2_mpyd_rnd_ll_s1:
+ di_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpyd_rnd_ll_s1>;
+def Hexagon_M2_mpyd_rnd_ll_s0:
+ di_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpyd_rnd_ll_s0>;
+
+//Rx+=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]]
+def Hexagon_M2_mpy_acc_hh_s0:
+ si_MInst_sisisi_acc_hh <"mpy", int_hexagon_M2_mpy_acc_hh_s0>;
+def Hexagon_M2_mpy_acc_hh_s1:
+ si_MInst_sisisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_hh_s1>;
+def Hexagon_M2_mpy_acc_sat_hh_s1:
+ si_MInst_sisisi_acc_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s1>;
+def Hexagon_M2_mpy_acc_sat_hh_s0:
+ si_MInst_sisisi_acc_sat_hh <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s0>;
+
+def Hexagon_M2_mpy_acc_hl_s0:
+ si_MInst_sisisi_acc_hl <"mpy", int_hexagon_M2_mpy_acc_hl_s0>;
+def Hexagon_M2_mpy_acc_hl_s1:
+ si_MInst_sisisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_hl_s1>;
+def Hexagon_M2_mpy_acc_sat_hl_s1:
+ si_MInst_sisisi_acc_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s1>;
+def Hexagon_M2_mpy_acc_sat_hl_s0:
+ si_MInst_sisisi_acc_sat_hl <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s0>;
+
+def Hexagon_M2_mpy_acc_lh_s0:
+ si_MInst_sisisi_acc_lh <"mpy", int_hexagon_M2_mpy_acc_lh_s0>;
+def Hexagon_M2_mpy_acc_lh_s1:
+ si_MInst_sisisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_lh_s1>;
+def Hexagon_M2_mpy_acc_sat_lh_s1:
+ si_MInst_sisisi_acc_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s1>;
+def Hexagon_M2_mpy_acc_sat_lh_s0:
+ si_MInst_sisisi_acc_sat_lh <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s0>;
+
+def Hexagon_M2_mpy_acc_ll_s0:
+ si_MInst_sisisi_acc_ll <"mpy", int_hexagon_M2_mpy_acc_ll_s0>;
+def Hexagon_M2_mpy_acc_ll_s1:
+ si_MInst_sisisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_ll_s1>;
+def Hexagon_M2_mpy_acc_sat_ll_s1:
+ si_MInst_sisisi_acc_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s1>;
+def Hexagon_M2_mpy_acc_sat_ll_s0:
+ si_MInst_sisisi_acc_sat_ll <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s0>;
+
+//Rx-=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]]
+def Hexagon_M2_mpy_nac_hh_s0:
+ si_MInst_sisisi_nac_hh <"mpy", int_hexagon_M2_mpy_nac_hh_s0>;
+def Hexagon_M2_mpy_nac_hh_s1:
+ si_MInst_sisisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_hh_s1>;
+def Hexagon_M2_mpy_nac_sat_hh_s1:
+ si_MInst_sisisi_nac_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s1>;
+def Hexagon_M2_mpy_nac_sat_hh_s0:
+ si_MInst_sisisi_nac_sat_hh <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s0>;
+
+def Hexagon_M2_mpy_nac_hl_s0:
+ si_MInst_sisisi_nac_hl <"mpy", int_hexagon_M2_mpy_nac_hl_s0>;
+def Hexagon_M2_mpy_nac_hl_s1:
+ si_MInst_sisisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_hl_s1>;
+def Hexagon_M2_mpy_nac_sat_hl_s1:
+ si_MInst_sisisi_nac_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s1>;
+def Hexagon_M2_mpy_nac_sat_hl_s0:
+ si_MInst_sisisi_nac_sat_hl <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s0>;
+
+def Hexagon_M2_mpy_nac_lh_s0:
+ si_MInst_sisisi_nac_lh <"mpy", int_hexagon_M2_mpy_nac_lh_s0>;
+def Hexagon_M2_mpy_nac_lh_s1:
+ si_MInst_sisisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_lh_s1>;
+def Hexagon_M2_mpy_nac_sat_lh_s1:
+ si_MInst_sisisi_nac_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s1>;
+def Hexagon_M2_mpy_nac_sat_lh_s0:
+ si_MInst_sisisi_nac_sat_lh <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s0>;
+
+def Hexagon_M2_mpy_nac_ll_s0:
+ si_MInst_sisisi_nac_ll <"mpy", int_hexagon_M2_mpy_nac_ll_s0>;
+def Hexagon_M2_mpy_nac_ll_s1:
+ si_MInst_sisisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_ll_s1>;
+def Hexagon_M2_mpy_nac_sat_ll_s1:
+ si_MInst_sisisi_nac_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s1>;
+def Hexagon_M2_mpy_nac_sat_ll_s0:
+ si_MInst_sisisi_nac_sat_ll <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s0>;
+
+//Rx+=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]
+def Hexagon_M2_mpyd_acc_hh_s0:
+ di_MInst_disisi_acc_hh <"mpy", int_hexagon_M2_mpyd_acc_hh_s0>;
+def Hexagon_M2_mpyd_acc_hh_s1:
+ di_MInst_disisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpyd_acc_hh_s1>;
+
+def Hexagon_M2_mpyd_acc_hl_s0:
+ di_MInst_disisi_acc_hl <"mpy", int_hexagon_M2_mpyd_acc_hl_s0>;
+def Hexagon_M2_mpyd_acc_hl_s1:
+ di_MInst_disisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpyd_acc_hl_s1>;
+
+def Hexagon_M2_mpyd_acc_lh_s0:
+ di_MInst_disisi_acc_lh <"mpy", int_hexagon_M2_mpyd_acc_lh_s0>;
+def Hexagon_M2_mpyd_acc_lh_s1:
+ di_MInst_disisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpyd_acc_lh_s1>;
+
+def Hexagon_M2_mpyd_acc_ll_s0:
+ di_MInst_disisi_acc_ll <"mpy", int_hexagon_M2_mpyd_acc_ll_s0>;
+def Hexagon_M2_mpyd_acc_ll_s1:
+ di_MInst_disisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpyd_acc_ll_s1>;
+
+//Rx-=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]
+def Hexagon_M2_mpyd_nac_hh_s0:
+ di_MInst_disisi_nac_hh <"mpy", int_hexagon_M2_mpyd_nac_hh_s0>;
+def Hexagon_M2_mpyd_nac_hh_s1:
+ di_MInst_disisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpyd_nac_hh_s1>;
+
+def Hexagon_M2_mpyd_nac_hl_s0:
+ di_MInst_disisi_nac_hl <"mpy", int_hexagon_M2_mpyd_nac_hl_s0>;
+def Hexagon_M2_mpyd_nac_hl_s1:
+ di_MInst_disisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpyd_nac_hl_s1>;
+
+def Hexagon_M2_mpyd_nac_lh_s0:
+ di_MInst_disisi_nac_lh <"mpy", int_hexagon_M2_mpyd_nac_lh_s0>;
+def Hexagon_M2_mpyd_nac_lh_s1:
+ di_MInst_disisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpyd_nac_lh_s1>;
+
+def Hexagon_M2_mpyd_nac_ll_s0:
+ di_MInst_disisi_nac_ll <"mpy", int_hexagon_M2_mpyd_nac_ll_s0>;
+def Hexagon_M2_mpyd_nac_ll_s1:
+ di_MInst_disisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpyd_nac_ll_s1>;
+
+// MTYPE / MPYS / Scalar 16x16 multiply unsigned.
+//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyu_hh_s0:
+ si_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyu_hh_s0>;
+def Hexagon_M2_mpyu_hh_s1:
+ si_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyu_hh_s1>;
+def Hexagon_M2_mpyu_hl_s0:
+ si_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyu_hl_s0>;
+def Hexagon_M2_mpyu_hl_s1:
+ si_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyu_hl_s1>;
+def Hexagon_M2_mpyu_lh_s0:
+ si_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyu_lh_s0>;
+def Hexagon_M2_mpyu_lh_s1:
+ si_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyu_lh_s1>;
+def Hexagon_M2_mpyu_ll_s0:
+ si_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyu_ll_s0>;
+def Hexagon_M2_mpyu_ll_s1:
+ si_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyu_ll_s1>;
+
+//Rdd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyud_hh_s0:
+ di_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyud_hh_s0>;
+def Hexagon_M2_mpyud_hh_s1:
+ di_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyud_hh_s1>;
+def Hexagon_M2_mpyud_hl_s0:
+ di_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyud_hl_s0>;
+def Hexagon_M2_mpyud_hl_s1:
+ di_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyud_hl_s1>;
+def Hexagon_M2_mpyud_lh_s0:
+ di_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyud_lh_s0>;
+def Hexagon_M2_mpyud_lh_s1:
+ di_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyud_lh_s1>;
+def Hexagon_M2_mpyud_ll_s0:
+ di_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyud_ll_s0>;
+def Hexagon_M2_mpyud_ll_s1:
+ di_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyud_ll_s1>;
+
+//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyu_acc_hh_s0:
+ si_MInst_sisisi_acc_hh <"mpyu", int_hexagon_M2_mpyu_acc_hh_s0>;
+def Hexagon_M2_mpyu_acc_hh_s1:
+ si_MInst_sisisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hh_s1>;
+def Hexagon_M2_mpyu_acc_hl_s0:
+ si_MInst_sisisi_acc_hl <"mpyu", int_hexagon_M2_mpyu_acc_hl_s0>;
+def Hexagon_M2_mpyu_acc_hl_s1:
+ si_MInst_sisisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hl_s1>;
+def Hexagon_M2_mpyu_acc_lh_s0:
+ si_MInst_sisisi_acc_lh <"mpyu", int_hexagon_M2_mpyu_acc_lh_s0>;
+def Hexagon_M2_mpyu_acc_lh_s1:
+ si_MInst_sisisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_lh_s1>;
+def Hexagon_M2_mpyu_acc_ll_s0:
+ si_MInst_sisisi_acc_ll <"mpyu", int_hexagon_M2_mpyu_acc_ll_s0>;
+def Hexagon_M2_mpyu_acc_ll_s1:
+ si_MInst_sisisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyu_acc_ll_s1>;
+
+//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyu_nac_hh_s0:
+ si_MInst_sisisi_nac_hh <"mpyu", int_hexagon_M2_mpyu_nac_hh_s0>;
+def Hexagon_M2_mpyu_nac_hh_s1:
+ si_MInst_sisisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hh_s1>;
+def Hexagon_M2_mpyu_nac_hl_s0:
+ si_MInst_sisisi_nac_hl <"mpyu", int_hexagon_M2_mpyu_nac_hl_s0>;
+def Hexagon_M2_mpyu_nac_hl_s1:
+ si_MInst_sisisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hl_s1>;
+def Hexagon_M2_mpyu_nac_lh_s0:
+ si_MInst_sisisi_nac_lh <"mpyu", int_hexagon_M2_mpyu_nac_lh_s0>;
+def Hexagon_M2_mpyu_nac_lh_s1:
+ si_MInst_sisisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_lh_s1>;
+def Hexagon_M2_mpyu_nac_ll_s0:
+ si_MInst_sisisi_nac_ll <"mpyu", int_hexagon_M2_mpyu_nac_ll_s0>;
+def Hexagon_M2_mpyu_nac_ll_s1:
+ si_MInst_sisisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyu_nac_ll_s1>;
+
+//Rdd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyud_acc_hh_s0:
+ di_MInst_disisi_acc_hh <"mpyu", int_hexagon_M2_mpyud_acc_hh_s0>;
+def Hexagon_M2_mpyud_acc_hh_s1:
+ di_MInst_disisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hh_s1>;
+def Hexagon_M2_mpyud_acc_hl_s0:
+ di_MInst_disisi_acc_hl <"mpyu", int_hexagon_M2_mpyud_acc_hl_s0>;
+def Hexagon_M2_mpyud_acc_hl_s1:
+ di_MInst_disisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hl_s1>;
+def Hexagon_M2_mpyud_acc_lh_s0:
+ di_MInst_disisi_acc_lh <"mpyu", int_hexagon_M2_mpyud_acc_lh_s0>;
+def Hexagon_M2_mpyud_acc_lh_s1:
+ di_MInst_disisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_lh_s1>;
+def Hexagon_M2_mpyud_acc_ll_s0:
+ di_MInst_disisi_acc_ll <"mpyu", int_hexagon_M2_mpyud_acc_ll_s0>;
+def Hexagon_M2_mpyud_acc_ll_s1:
+ di_MInst_disisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyud_acc_ll_s1>;
+
+//Rdd-=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyud_nac_hh_s0:
+ di_MInst_disisi_nac_hh <"mpyu", int_hexagon_M2_mpyud_nac_hh_s0>;
+def Hexagon_M2_mpyud_nac_hh_s1:
+ di_MInst_disisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hh_s1>;
+def Hexagon_M2_mpyud_nac_hl_s0:
+ di_MInst_disisi_nac_hl <"mpyu", int_hexagon_M2_mpyud_nac_hl_s0>;
+def Hexagon_M2_mpyud_nac_hl_s1:
+ di_MInst_disisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hl_s1>;
+def Hexagon_M2_mpyud_nac_lh_s0:
+ di_MInst_disisi_nac_lh <"mpyu", int_hexagon_M2_mpyud_nac_lh_s0>;
+def Hexagon_M2_mpyud_nac_lh_s1:
+ di_MInst_disisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_lh_s1>;
+def Hexagon_M2_mpyud_nac_ll_s0:
+ di_MInst_disisi_nac_ll <"mpyu", int_hexagon_M2_mpyud_nac_ll_s0>;
+def Hexagon_M2_mpyud_nac_ll_s1:
+ di_MInst_disisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyud_nac_ll_s1>;
+
+
+/********************************************************************
+* MTYPE/VB *
+*********************************************************************/
+
+// MTYPE / VB / Vector reduce add unsigned bytes.
+def Hexagon_A2_vraddub:
+ di_MInst_didi <"vraddub", int_hexagon_A2_vraddub>;
+def Hexagon_A2_vraddub_acc:
+ di_MInst_dididi_acc <"vraddub", int_hexagon_A2_vraddub_acc>;
+
+// MTYPE / VB / Vector sum of absolute differences unsigned bytes.
+def Hexagon_A2_vrsadub:
+ di_MInst_didi <"vrsadub", int_hexagon_A2_vrsadub>;
+def Hexagon_A2_vrsadub_acc:
+ di_MInst_dididi_acc <"vrsadub", int_hexagon_A2_vrsadub_acc>;
+
+/********************************************************************
+* MTYPE/VH *
+*********************************************************************/
+
+// MTYPE / VH / Vector dual multiply.
+def Hexagon_M2_vdmpys_s1:
+ di_MInst_didi_s1_sat <"vdmpy", int_hexagon_M2_vdmpys_s1>;
+def Hexagon_M2_vdmpys_s0:
+ di_MInst_didi_sat <"vdmpy", int_hexagon_M2_vdmpys_s0>;
+def Hexagon_M2_vdmacs_s1:
+ di_MInst_dididi_acc_s1_sat <"vdmpy", int_hexagon_M2_vdmacs_s1>;
+def Hexagon_M2_vdmacs_s0:
+ di_MInst_dididi_acc_sat <"vdmpy", int_hexagon_M2_vdmacs_s0>;
+
+// MTYPE / VH / Vector dual multiply with round and pack.
+def Hexagon_M2_vdmpyrs_s0:
+ si_MInst_didi_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s0>;
+def Hexagon_M2_vdmpyrs_s1:
+ si_MInst_didi_s1_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s1>;
+
+// MTYPE / VH / Vector multiply even halfwords.
+def Hexagon_M2_vmpy2es_s1:
+ di_MInst_didi_s1_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s1>;
+def Hexagon_M2_vmpy2es_s0:
+ di_MInst_didi_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s0>;
+def Hexagon_M2_vmac2es:
+ di_MInst_dididi_acc <"vmpyeh", int_hexagon_M2_vmac2es>;
+def Hexagon_M2_vmac2es_s1:
+ di_MInst_dididi_acc_s1_sat <"vmpyeh", int_hexagon_M2_vmac2es_s1>;
+def Hexagon_M2_vmac2es_s0:
+ di_MInst_dididi_acc_sat <"vmpyeh", int_hexagon_M2_vmac2es_s0>;
+
+// MTYPE / VH / Vector multiply halfwords.
+def Hexagon_M2_vmpy2s_s0:
+ di_MInst_sisi_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0>;
+def Hexagon_M2_vmpy2s_s1:
+ di_MInst_sisi_s1_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1>;
+def Hexagon_M2_vmac2:
+ di_MInst_disisi_acc <"vmpyh", int_hexagon_M2_vmac2>;
+def Hexagon_M2_vmac2s_s0:
+ di_MInst_disisi_acc_sat <"vmpyh", int_hexagon_M2_vmac2s_s0>;
+def Hexagon_M2_vmac2s_s1:
+ di_MInst_disisi_acc_s1_sat <"vmpyh", int_hexagon_M2_vmac2s_s1>;
+
+// MTYPE / VH / Vector multiply halfwords with round and pack.
+def Hexagon_M2_vmpy2s_s0pack:
+ si_MInst_sisi_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0pack>;
+def Hexagon_M2_vmpy2s_s1pack:
+ si_MInst_sisi_s1_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1pack>;
+
+// MTYPE / VH / Vector reduce multiply halfwords.
+// Rxx32+=vrmpyh(Rss32,Rtt32)
+def Hexagon_M2_vrmpy_s0:
+ di_MInst_didi <"vrmpyh", int_hexagon_M2_vrmpy_s0>;
+def Hexagon_M2_vrmac_s0:
+ di_MInst_dididi_acc <"vrmpyh", int_hexagon_M2_vrmac_s0>;
+
+
+/********************************************************************
+* STYPE/ALU *
+*********************************************************************/
+
+// STYPE / ALU / Absolute value.
+def Hexagon_A2_abs:
+ si_SInst_si <"abs", int_hexagon_A2_abs>;
+def Hexagon_A2_absp:
+ di_SInst_di <"abs", int_hexagon_A2_absp>;
+def Hexagon_A2_abssat:
+ si_SInst_si_sat <"abs", int_hexagon_A2_abssat>;
+
+// STYPE / ALU / Negate.
+def Hexagon_A2_negp:
+ di_SInst_di <"neg", int_hexagon_A2_negp>;
+def Hexagon_A2_negsat:
+ si_SInst_si_sat <"neg", int_hexagon_A2_negsat>;
+
+// STYPE / ALU / Logical Not.
+def Hexagon_A2_notp:
+ di_SInst_di <"not", int_hexagon_A2_notp>;
+
+// STYPE / ALU / Sign extend word to doubleword.
+def Hexagon_A2_sxtw:
+ di_SInst_si <"sxtw", int_hexagon_A2_sxtw>;
+
+
+/********************************************************************
+* STYPE/BIT *
+*********************************************************************/
+
+// STYPE / BIT / Count leading.
+def Hexagon_S2_cl0:
+ si_SInst_si <"cl0", int_hexagon_S2_cl0>;
+def Hexagon_S2_cl0p:
+ si_SInst_di <"cl0", int_hexagon_S2_cl0p>;
+def Hexagon_S2_cl1:
+ si_SInst_si <"cl1", int_hexagon_S2_cl1>;
+def Hexagon_S2_cl1p:
+ si_SInst_di <"cl1", int_hexagon_S2_cl1p>;
+def Hexagon_S2_clb:
+ si_SInst_si <"clb", int_hexagon_S2_clb>;
+def Hexagon_S2_clbp:
+ si_SInst_di <"clb", int_hexagon_S2_clbp>;
+def Hexagon_S2_clbnorm:
+ si_SInst_si <"normamt", int_hexagon_S2_clbnorm>;
+
+// STYPE / BIT / Count trailing.
+def Hexagon_S2_ct0:
+ si_SInst_si <"ct0", int_hexagon_S2_ct0>;
+def Hexagon_S2_ct1:
+ si_SInst_si <"ct1", int_hexagon_S2_ct1>;
+
+// STYPE / BIT / Compare bit mask.
+def HEXAGON_C2_bitsclr:
+ qi_SInst_sisi <"bitsclr", int_hexagon_C2_bitsclr>;
+def HEXAGON_C2_bitsclri:
+ qi_SInst_siu6 <"bitsclr", int_hexagon_C2_bitsclri>;
+def HEXAGON_C2_bitsset:
+ qi_SInst_sisi <"bitsset", int_hexagon_C2_bitsset>;
+
+// STYPE / BIT / Extract unsigned.
+// Rd[d][32/64]=extractu(Rs[s],Rt[t],[imm])
+def Hexagon_S2_extractu:
+ si_SInst_siu5u5 <"extractu",int_hexagon_S2_extractu>;
+def Hexagon_S2_extractu_rp:
+ si_SInst_sidi <"extractu",int_hexagon_S2_extractu_rp>;
+def Hexagon_S2_extractup:
+ di_SInst_diu6u6 <"extractu",int_hexagon_S2_extractup>;
+def Hexagon_S2_extractup_rp:
+ di_SInst_didi <"extractu",int_hexagon_S2_extractup_rp>;
+
+// STYPE / BIT / Insert bitfield.
+def HEXAGON_S2_insert:
+ si_SInst_sisiu5u5 <"insert", int_hexagon_S2_insert>;
+def HEXAGON_S2_insert_rp:
+ si_SInst_sisidi <"insert", int_hexagon_S2_insert_rp>;
+def HEXAGON_S2_insertp:
+ di_SInst_didiu6u6 <"insert", int_hexagon_S2_insertp>;
+def HEXAGON_S2_insertp_rp:
+ di_SInst_dididi <"insert", int_hexagon_S2_insertp_rp>;
+
+// STYPE / BIT / Innterleave/deinterleave.
+def HEXAGON_S2_interleave:
+ di_SInst_di <"interleave", int_hexagon_S2_interleave>;
+def HEXAGON_S2_deinterleave:
+ di_SInst_di <"deinterleave", int_hexagon_S2_deinterleave>;
+
+// STYPE / BIT / Linear feedback-shift Iteration.
+def HEXAGON_S2_lfsp:
+ di_SInst_didi <"lfs", int_hexagon_S2_lfsp>;
+
+// STYPE / BIT / Bit reverse.
+def HEXAGON_S2_brev:
+ si_SInst_si <"brev", int_hexagon_S2_brev>;
+
+// STYPE / BIT / Set/Clear/Toggle Bit.
+def Hexagon_S2_setbit_i:
+ si_SInst_siu5 <"setbit", int_hexagon_S2_setbit_i>;
+def Hexagon_S2_togglebit_i:
+ si_SInst_siu5 <"togglebit", int_hexagon_S2_togglebit_i>;
+def Hexagon_S2_clrbit_i:
+ si_SInst_siu5 <"clrbit", int_hexagon_S2_clrbit_i>;
+def Hexagon_S2_setbit_r:
+ si_SInst_sisi <"setbit", int_hexagon_S2_setbit_r>;
+def Hexagon_S2_togglebit_r:
+ si_SInst_sisi <"togglebit", int_hexagon_S2_togglebit_r>;
+def Hexagon_S2_clrbit_r:
+ si_SInst_sisi <"clrbit", int_hexagon_S2_clrbit_r>;
+
+// STYPE / BIT / Test Bit.
+def Hexagon_S2_tstbit_i:
+ qi_SInst_siu5 <"tstbit", int_hexagon_S2_tstbit_i>;
+def Hexagon_S2_tstbit_r:
+ qi_SInst_sisi <"tstbit", int_hexagon_S2_tstbit_r>;
+
+
+/********************************************************************
+* STYPE/COMPLEX *
+*********************************************************************/
+
+// STYPE / COMPLEX / Vector Complex conjugate.
+def Hexagon_A2_vconj:
+ di_SInst_di_sat <"vconj", int_hexagon_A2_vconj>;
+
+// STYPE / COMPLEX / Vector Complex rotate.
+def Hexagon_S2_vcrotate:
+ di_SInst_disi <"vcrotate",int_hexagon_S2_vcrotate>;
+
+
+/********************************************************************
+* STYPE/PERM *
+*********************************************************************/
+
+// STYPE / PERM / Saturate.
+def Hexagon_A2_sat:
+ si_SInst_di <"sat", int_hexagon_A2_sat>;
+def Hexagon_A2_satb:
+ si_SInst_si <"satb", int_hexagon_A2_satb>;
+def Hexagon_A2_sath:
+ si_SInst_si <"sath", int_hexagon_A2_sath>;
+def Hexagon_A2_satub:
+ si_SInst_si <"satub", int_hexagon_A2_satub>;
+def Hexagon_A2_satuh:
+ si_SInst_si <"satuh", int_hexagon_A2_satuh>;
+
+// STYPE / PERM / Swizzle bytes.
+def Hexagon_A2_swiz:
+ si_SInst_si <"swiz", int_hexagon_A2_swiz>;
+
+// STYPE / PERM / Vector align.
+// Need custom lowering
+def Hexagon_S2_valignib:
+ di_SInst_didiu3 <"valignb", int_hexagon_S2_valignib>;
+def Hexagon_S2_valignrb:
+ di_SInst_didiqi <"valignb", int_hexagon_S2_valignrb>;
+
+// STYPE / PERM / Vector round and pack.
+def Hexagon_S2_vrndpackwh:
+ si_SInst_di <"vrndwh", int_hexagon_S2_vrndpackwh>;
+def Hexagon_S2_vrndpackwhs:
+ si_SInst_di_sat <"vrndwh", int_hexagon_S2_vrndpackwhs>;
+
+// STYPE / PERM / Vector saturate and pack.
+def Hexagon_S2_svsathb:
+ si_SInst_si <"vsathb", int_hexagon_S2_svsathb>;
+def Hexagon_S2_vsathb:
+ si_SInst_di <"vsathb", int_hexagon_S2_vsathb>;
+def Hexagon_S2_svsathub:
+ si_SInst_si <"vsathub", int_hexagon_S2_svsathub>;
+def Hexagon_S2_vsathub:
+ si_SInst_di <"vsathub", int_hexagon_S2_vsathub>;
+def Hexagon_S2_vsatwh:
+ si_SInst_di <"vsatwh", int_hexagon_S2_vsatwh>;
+def Hexagon_S2_vsatwuh:
+ si_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh>;
+
+// STYPE / PERM / Vector saturate without pack.
+def Hexagon_S2_vsathb_nopack:
+ di_SInst_di <"vsathb", int_hexagon_S2_vsathb_nopack>;
+def Hexagon_S2_vsathub_nopack:
+ di_SInst_di <"vsathub", int_hexagon_S2_vsathub_nopack>;
+def Hexagon_S2_vsatwh_nopack:
+ di_SInst_di <"vsatwh", int_hexagon_S2_vsatwh_nopack>;
+def Hexagon_S2_vsatwuh_nopack:
+ di_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh_nopack>;
+
+// STYPE / PERM / Vector shuffle.
+def Hexagon_S2_shuffeb:
+ di_SInst_didi <"shuffeb", int_hexagon_S2_shuffeb>;
+def Hexagon_S2_shuffeh:
+ di_SInst_didi <"shuffeh", int_hexagon_S2_shuffeh>;
+def Hexagon_S2_shuffob:
+ di_SInst_didi <"shuffob", int_hexagon_S2_shuffob>;
+def Hexagon_S2_shuffoh:
+ di_SInst_didi <"shuffoh", int_hexagon_S2_shuffoh>;
+
+// STYPE / PERM / Vector splat bytes.
+def Hexagon_S2_vsplatrb:
+ si_SInst_si <"vsplatb", int_hexagon_S2_vsplatrb>;
+
+// STYPE / PERM / Vector splat halfwords.
+def Hexagon_S2_vsplatrh:
+ di_SInst_si <"vsplath", int_hexagon_S2_vsplatrh>;
+
+// STYPE / PERM / Vector splice.
+def HEXAGON_S2_vsplicerb:
+ di_SInst_didiqi <"vspliceb",int_hexagon_S2_vsplicerb>;
+def HEXAGON_S2_vspliceib:
+ di_SInst_didiu3 <"vspliceb",int_hexagon_S2_vspliceib>;
+
+// STYPE / PERM / Sign extend.
+def Hexagon_S2_vsxtbh:
+ di_SInst_si <"vsxtbh", int_hexagon_S2_vsxtbh>;
+def Hexagon_S2_vsxthw:
+ di_SInst_si <"vsxthw", int_hexagon_S2_vsxthw>;
+
+// STYPE / PERM / Truncate.
+def Hexagon_S2_vtrunehb:
+ si_SInst_di <"vtrunehb",int_hexagon_S2_vtrunehb>;
+def Hexagon_S2_vtrunohb:
+ si_SInst_di <"vtrunohb",int_hexagon_S2_vtrunohb>;
+def Hexagon_S2_vtrunewh:
+ di_SInst_didi <"vtrunewh",int_hexagon_S2_vtrunewh>;
+def Hexagon_S2_vtrunowh:
+ di_SInst_didi <"vtrunowh",int_hexagon_S2_vtrunowh>;
+
+// STYPE / PERM / Zero extend.
+def Hexagon_S2_vzxtbh:
+ di_SInst_si <"vzxtbh", int_hexagon_S2_vzxtbh>;
+def Hexagon_S2_vzxthw:
+ di_SInst_si <"vzxthw", int_hexagon_S2_vzxthw>;
+
+
+/********************************************************************
+* STYPE/PRED *
+*********************************************************************/
+
+// STYPE / PRED / Mask generate from predicate.
+def Hexagon_C2_mask:
+ di_SInst_qi <"mask", int_hexagon_C2_mask>;
+
+// STYPE / PRED / Predicate transfer.
+def Hexagon_C2_tfrpr:
+ si_SInst_qi <"", int_hexagon_C2_tfrpr>;
+def Hexagon_C2_tfrrp:
+ qi_SInst_si <"", int_hexagon_C2_tfrrp>;
+
+// STYPE / PRED / Viterbi pack even and odd predicate bits.
+def Hexagon_C2_vitpack:
+ si_SInst_qiqi <"vitpack",int_hexagon_C2_vitpack>;
+
+
+/********************************************************************
+* STYPE/SHIFT *
+*********************************************************************/
+
+// STYPE / SHIFT / Shift by immediate.
+def Hexagon_S2_asl_i_r:
+ si_SInst_siu5 <"asl", int_hexagon_S2_asl_i_r>;
+def Hexagon_S2_asr_i_r:
+ si_SInst_siu5 <"asr", int_hexagon_S2_asr_i_r>;
+def Hexagon_S2_lsr_i_r:
+ si_SInst_siu5 <"lsr", int_hexagon_S2_lsr_i_r>;
+def Hexagon_S2_asl_i_p:
+ di_SInst_diu6 <"asl", int_hexagon_S2_asl_i_p>;
+def Hexagon_S2_asr_i_p:
+ di_SInst_diu6 <"asr", int_hexagon_S2_asr_i_p>;
+def Hexagon_S2_lsr_i_p:
+ di_SInst_diu6 <"lsr", int_hexagon_S2_lsr_i_p>;
+
+// STYPE / SHIFT / Shift by immediate and accumulate.
+def Hexagon_S2_asl_i_r_acc:
+ si_SInst_sisiu5_acc <"asl", int_hexagon_S2_asl_i_r_acc>;
+def Hexagon_S2_asr_i_r_acc:
+ si_SInst_sisiu5_acc <"asr", int_hexagon_S2_asr_i_r_acc>;
+def Hexagon_S2_lsr_i_r_acc:
+ si_SInst_sisiu5_acc <"lsr", int_hexagon_S2_lsr_i_r_acc>;
+def Hexagon_S2_asl_i_r_nac:
+ si_SInst_sisiu5_nac <"asl", int_hexagon_S2_asl_i_r_nac>;
+def Hexagon_S2_asr_i_r_nac:
+ si_SInst_sisiu5_nac <"asr", int_hexagon_S2_asr_i_r_nac>;
+def Hexagon_S2_lsr_i_r_nac:
+ si_SInst_sisiu5_nac <"lsr", int_hexagon_S2_lsr_i_r_nac>;
+def Hexagon_S2_asl_i_p_acc:
+ di_SInst_didiu6_acc <"asl", int_hexagon_S2_asl_i_p_acc>;
+def Hexagon_S2_asr_i_p_acc:
+ di_SInst_didiu6_acc <"asr", int_hexagon_S2_asr_i_p_acc>;
+def Hexagon_S2_lsr_i_p_acc:
+ di_SInst_didiu6_acc <"lsr", int_hexagon_S2_lsr_i_p_acc>;
+def Hexagon_S2_asl_i_p_nac:
+ di_SInst_didiu6_nac <"asl", int_hexagon_S2_asl_i_p_nac>;
+def Hexagon_S2_asr_i_p_nac:
+ di_SInst_didiu6_nac <"asr", int_hexagon_S2_asr_i_p_nac>;
+def Hexagon_S2_lsr_i_p_nac:
+ di_SInst_didiu6_nac <"lsr", int_hexagon_S2_lsr_i_p_nac>;
+
+// STYPE / SHIFT / Shift by immediate and add.
+def Hexagon_S2_addasl_rrri:
+ si_SInst_sisiu3 <"addasl", int_hexagon_S2_addasl_rrri>;
+
+// STYPE / SHIFT / Shift by immediate and logical.
+def Hexagon_S2_asl_i_r_and:
+ si_SInst_sisiu5_and <"asl", int_hexagon_S2_asl_i_r_and>;
+def Hexagon_S2_asr_i_r_and:
+ si_SInst_sisiu5_and <"asr", int_hexagon_S2_asr_i_r_and>;
+def Hexagon_S2_lsr_i_r_and:
+ si_SInst_sisiu5_and <"lsr", int_hexagon_S2_lsr_i_r_and>;
+
+def Hexagon_S2_asl_i_r_xacc:
+ si_SInst_sisiu5_xor <"asl", int_hexagon_S2_asl_i_r_xacc>;
+def Hexagon_S2_lsr_i_r_xacc:
+ si_SInst_sisiu5_xor <"lsr", int_hexagon_S2_lsr_i_r_xacc>;
+
+def Hexagon_S2_asl_i_r_or:
+ si_SInst_sisiu5_or <"asl", int_hexagon_S2_asl_i_r_or>;
+def Hexagon_S2_asr_i_r_or:
+ si_SInst_sisiu5_or <"asr", int_hexagon_S2_asr_i_r_or>;
+def Hexagon_S2_lsr_i_r_or:
+ si_SInst_sisiu5_or <"lsr", int_hexagon_S2_lsr_i_r_or>;
+
+def Hexagon_S2_asl_i_p_and:
+ di_SInst_didiu6_and <"asl", int_hexagon_S2_asl_i_p_and>;
+def Hexagon_S2_asr_i_p_and:
+ di_SInst_didiu6_and <"asr", int_hexagon_S2_asr_i_p_and>;
+def Hexagon_S2_lsr_i_p_and:
+ di_SInst_didiu6_and <"lsr", int_hexagon_S2_lsr_i_p_and>;
+
+def Hexagon_S2_asl_i_p_xacc:
+ di_SInst_didiu6_xor <"asl", int_hexagon_S2_asl_i_p_xacc>;
+def Hexagon_S2_lsr_i_p_xacc:
+ di_SInst_didiu6_xor <"lsr", int_hexagon_S2_lsr_i_p_xacc>;
+
+def Hexagon_S2_asl_i_p_or:
+ di_SInst_didiu6_or <"asl", int_hexagon_S2_asl_i_p_or>;
+def Hexagon_S2_asr_i_p_or:
+ di_SInst_didiu6_or <"asr", int_hexagon_S2_asr_i_p_or>;
+def Hexagon_S2_lsr_i_p_or:
+ di_SInst_didiu6_or <"lsr", int_hexagon_S2_lsr_i_p_or>;
+
+// STYPE / SHIFT / Shift right by immediate with rounding.
+def Hexagon_S2_asr_i_r_rnd:
+ si_SInst_siu5_rnd <"asr", int_hexagon_S2_asr_i_r_rnd>;
+def Hexagon_S2_asr_i_r_rnd_goodsyntax:
+ si_SInst_siu5 <"asrrnd", int_hexagon_S2_asr_i_r_rnd_goodsyntax>;
+
+// STYPE / SHIFT / Shift left by immediate with saturation.
+def Hexagon_S2_asl_i_r_sat:
+ si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_i_r_sat>;
+
+// STYPE / SHIFT / Shift by register.
+def Hexagon_S2_asl_r_r:
+ si_SInst_sisi <"asl", int_hexagon_S2_asl_r_r>;
+def Hexagon_S2_asr_r_r:
+ si_SInst_sisi <"asr", int_hexagon_S2_asr_r_r>;
+def Hexagon_S2_lsl_r_r:
+ si_SInst_sisi <"lsl", int_hexagon_S2_lsl_r_r>;
+def Hexagon_S2_lsr_r_r:
+ si_SInst_sisi <"lsr", int_hexagon_S2_lsr_r_r>;
+def Hexagon_S2_asl_r_p:
+ di_SInst_disi <"asl", int_hexagon_S2_asl_r_p>;
+def Hexagon_S2_asr_r_p:
+ di_SInst_disi <"asr", int_hexagon_S2_asr_r_p>;
+def Hexagon_S2_lsl_r_p:
+ di_SInst_disi <"lsl", int_hexagon_S2_lsl_r_p>;
+def Hexagon_S2_lsr_r_p:
+ di_SInst_disi <"lsr", int_hexagon_S2_lsr_r_p>;
+
+// STYPE / SHIFT / Shift by register and accumulate.
+def Hexagon_S2_asl_r_r_acc:
+ si_SInst_sisisi_acc <"asl", int_hexagon_S2_asl_r_r_acc>;
+def Hexagon_S2_asr_r_r_acc:
+ si_SInst_sisisi_acc <"asr", int_hexagon_S2_asr_r_r_acc>;
+def Hexagon_S2_lsl_r_r_acc:
+ si_SInst_sisisi_acc <"lsl", int_hexagon_S2_lsl_r_r_acc>;
+def Hexagon_S2_lsr_r_r_acc:
+ si_SInst_sisisi_acc <"lsr", int_hexagon_S2_lsr_r_r_acc>;
+def Hexagon_S2_asl_r_p_acc:
+ di_SInst_didisi_acc <"asl", int_hexagon_S2_asl_r_p_acc>;
+def Hexagon_S2_asr_r_p_acc:
+ di_SInst_didisi_acc <"asr", int_hexagon_S2_asr_r_p_acc>;
+def Hexagon_S2_lsl_r_p_acc:
+ di_SInst_didisi_acc <"lsl", int_hexagon_S2_lsl_r_p_acc>;
+def Hexagon_S2_lsr_r_p_acc:
+ di_SInst_didisi_acc <"lsr", int_hexagon_S2_lsr_r_p_acc>;
+
+def Hexagon_S2_asl_r_r_nac:
+ si_SInst_sisisi_nac <"asl", int_hexagon_S2_asl_r_r_nac>;
+def Hexagon_S2_asr_r_r_nac:
+ si_SInst_sisisi_nac <"asr", int_hexagon_S2_asr_r_r_nac>;
+def Hexagon_S2_lsl_r_r_nac:
+ si_SInst_sisisi_nac <"lsl", int_hexagon_S2_lsl_r_r_nac>;
+def Hexagon_S2_lsr_r_r_nac:
+ si_SInst_sisisi_nac <"lsr", int_hexagon_S2_lsr_r_r_nac>;
+def Hexagon_S2_asl_r_p_nac:
+ di_SInst_didisi_nac <"asl", int_hexagon_S2_asl_r_p_nac>;
+def Hexagon_S2_asr_r_p_nac:
+ di_SInst_didisi_nac <"asr", int_hexagon_S2_asr_r_p_nac>;
+def Hexagon_S2_lsl_r_p_nac:
+ di_SInst_didisi_nac <"lsl", int_hexagon_S2_lsl_r_p_nac>;
+def Hexagon_S2_lsr_r_p_nac:
+ di_SInst_didisi_nac <"lsr", int_hexagon_S2_lsr_r_p_nac>;
+
+// STYPE / SHIFT / Shift by register and logical.
+def Hexagon_S2_asl_r_r_and:
+ si_SInst_sisisi_and <"asl", int_hexagon_S2_asl_r_r_and>;
+def Hexagon_S2_asr_r_r_and:
+ si_SInst_sisisi_and <"asr", int_hexagon_S2_asr_r_r_and>;
+def Hexagon_S2_lsl_r_r_and:
+ si_SInst_sisisi_and <"lsl", int_hexagon_S2_lsl_r_r_and>;
+def Hexagon_S2_lsr_r_r_and:
+ si_SInst_sisisi_and <"lsr", int_hexagon_S2_lsr_r_r_and>;
+
+def Hexagon_S2_asl_r_r_or:
+ si_SInst_sisisi_or <"asl", int_hexagon_S2_asl_r_r_or>;
+def Hexagon_S2_asr_r_r_or:
+ si_SInst_sisisi_or <"asr", int_hexagon_S2_asr_r_r_or>;
+def Hexagon_S2_lsl_r_r_or:
+ si_SInst_sisisi_or <"lsl", int_hexagon_S2_lsl_r_r_or>;
+def Hexagon_S2_lsr_r_r_or:
+ si_SInst_sisisi_or <"lsr", int_hexagon_S2_lsr_r_r_or>;
+
+def Hexagon_S2_asl_r_p_and:
+ di_SInst_didisi_and <"asl", int_hexagon_S2_asl_r_p_and>;
+def Hexagon_S2_asr_r_p_and:
+ di_SInst_didisi_and <"asr", int_hexagon_S2_asr_r_p_and>;
+def Hexagon_S2_lsl_r_p_and:
+ di_SInst_didisi_and <"lsl", int_hexagon_S2_lsl_r_p_and>;
+def Hexagon_S2_lsr_r_p_and:
+ di_SInst_didisi_and <"lsr", int_hexagon_S2_lsr_r_p_and>;
+
+def Hexagon_S2_asl_r_p_or:
+ di_SInst_didisi_or <"asl", int_hexagon_S2_asl_r_p_or>;
+def Hexagon_S2_asr_r_p_or:
+ di_SInst_didisi_or <"asr", int_hexagon_S2_asr_r_p_or>;
+def Hexagon_S2_lsl_r_p_or:
+ di_SInst_didisi_or <"lsl", int_hexagon_S2_lsl_r_p_or>;
+def Hexagon_S2_lsr_r_p_or:
+ di_SInst_didisi_or <"lsr", int_hexagon_S2_lsr_r_p_or>;
+
+// STYPE / SHIFT / Shift by register with saturation.
+def Hexagon_S2_asl_r_r_sat:
+ si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_r_r_sat>;
+def Hexagon_S2_asr_r_r_sat:
+ si_SInst_sisi_sat <"asr", int_hexagon_S2_asr_r_r_sat>;
+
+// STYPE / SHIFT / Table Index.
+def HEXAGON_S2_tableidxb_goodsyntax:
+ si_MInst_sisiu4u5 <"tableidxb",int_hexagon_S2_tableidxb_goodsyntax>;
+def HEXAGON_S2_tableidxd_goodsyntax:
+ si_MInst_sisiu4u5 <"tableidxd",int_hexagon_S2_tableidxd_goodsyntax>;
+def HEXAGON_S2_tableidxh_goodsyntax:
+ si_MInst_sisiu4u5 <"tableidxh",int_hexagon_S2_tableidxh_goodsyntax>;
+def HEXAGON_S2_tableidxw_goodsyntax:
+ si_MInst_sisiu4u5 <"tableidxw",int_hexagon_S2_tableidxw_goodsyntax>;
+
+
+/********************************************************************
+* STYPE/VH *
+*********************************************************************/
+
+// STYPE / VH / Vector absolute value halfwords.
+// Rdd64=vabsh(Rss64)
+def Hexagon_A2_vabsh:
+ di_SInst_di <"vabsh", int_hexagon_A2_vabsh>;
+def Hexagon_A2_vabshsat:
+ di_SInst_di_sat <"vabsh", int_hexagon_A2_vabshsat>;
+
+// STYPE / VH / Vector shift halfwords by immediate.
+// Rdd64=v[asl/asr/lsr]h(Rss64,Rt32)
+def Hexagon_S2_asl_i_vh:
+ di_SInst_disi <"vaslh", int_hexagon_S2_asl_i_vh>;
+def Hexagon_S2_asr_i_vh:
+ di_SInst_disi <"vasrh", int_hexagon_S2_asr_i_vh>;
+def Hexagon_S2_lsr_i_vh:
+ di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_i_vh>;
+
+// STYPE / VH / Vector shift halfwords by register.
+// Rdd64=v[asl/asr/lsl/lsr]w(Rss64,Rt32)
+def Hexagon_S2_asl_r_vh:
+ di_SInst_disi <"vaslh", int_hexagon_S2_asl_r_vh>;
+def Hexagon_S2_asr_r_vh:
+ di_SInst_disi <"vasrh", int_hexagon_S2_asr_r_vh>;
+def Hexagon_S2_lsl_r_vh:
+ di_SInst_disi <"vlslh", int_hexagon_S2_lsl_r_vh>;
+def Hexagon_S2_lsr_r_vh:
+ di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_r_vh>;
+
+
+/********************************************************************
+* STYPE/VW *
+*********************************************************************/
+
+// STYPE / VW / Vector absolute value words.
+def Hexagon_A2_vabsw:
+ di_SInst_di <"vabsw", int_hexagon_A2_vabsw>;
+def Hexagon_A2_vabswsat:
+ di_SInst_di_sat <"vabsw", int_hexagon_A2_vabswsat>;
+
+// STYPE / VW / Vector shift words by immediate.
+// Rdd64=v[asl/vsl]w(Rss64,Rt32)
+def Hexagon_S2_asl_i_vw:
+ di_SInst_disi <"vaslw", int_hexagon_S2_asl_i_vw>;
+def Hexagon_S2_asr_i_vw:
+ di_SInst_disi <"vasrw", int_hexagon_S2_asr_i_vw>;
+def Hexagon_S2_lsr_i_vw:
+ di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_i_vw>;
+
+// STYPE / VW / Vector shift words by register.
+// Rdd64=v[asl/vsl]w(Rss64,Rt32)
+def Hexagon_S2_asl_r_vw:
+ di_SInst_disi <"vaslw", int_hexagon_S2_asl_r_vw>;
+def Hexagon_S2_asr_r_vw:
+ di_SInst_disi <"vasrw", int_hexagon_S2_asr_r_vw>;
+def Hexagon_S2_lsl_r_vw:
+ di_SInst_disi <"vlslw", int_hexagon_S2_lsl_r_vw>;
+def Hexagon_S2_lsr_r_vw:
+ di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_r_vw>;
+
+// STYPE / VW / Vector shift words with truncate and pack.
+def Hexagon_S2_asr_r_svw_trun:
+ si_SInst_disi <"vasrw", int_hexagon_S2_asr_r_svw_trun>;
+def Hexagon_S2_asr_i_svw_trun:
+ si_SInst_diu5 <"vasrw", int_hexagon_S2_asr_i_svw_trun>;
+
+include "HexagonIntrinsicsV3.td"
+include "HexagonIntrinsicsV4.td"
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
new file mode 100644
index 0000000..68eaf68
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
@@ -0,0 +1,29 @@
+//===-- HexagonIntrinsicsDerived.td - Derived intrinsics ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Multiply 64-bit and use lower result
+//
+// Optimized with intrinisics accumulates
+//
+def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2),
+ (COMBINE_rr
+ (Hexagon_M2_maci
+ (Hexagon_M2_maci (EXTRACT_SUBREG (MPYU64 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg)),
+ subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)),
+ (EXTRACT_SUBREG (MPYU64 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg)),
+ subreg_loreg))>;
+
+
+
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV3.td b/lib/Target/Hexagon/HexagonIntrinsicsV3.td
new file mode 100644
index 0000000..2a54e62
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV3.td
@@ -0,0 +1,50 @@
+//=- HexagonIntrinsicsV3.td - Target Description for Hexagon -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V3 Compiler Intrinsics in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+
+// MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary.
+def Hexagon_M2_vrcmpys_s1:
+ di_MInst_disi_s1_sat <"vrcmpys", int_hexagon_M2_vrcmpys_s1>;
+def Hexagon_M2_vrcmpys_acc_s1:
+ di_MInst_didisi_acc_s1_sat <"vrcmpys", int_hexagon_M2_vrcmpys_acc_s1>;
+def Hexagon_M2_vrcmpys_s1rp:
+ si_MInst_disi_s1_rnd_sat <"vrcmpys", int_hexagon_M2_vrcmpys_s1rp>;
+
+
+
+
+/********************************************************************
+* MTYPE/VB *
+*********************************************************************/
+
+// MTYPE / VB / Vector reduce add unsigned bytes.
+def Hexagon_M2_vradduh:
+ si_MInst_didi <"vradduh", int_hexagon_M2_vradduh>;
+
+
+/********************************************************************
+* ALU64/ALU *
+*********************************************************************/
+
+// ALU64 / ALU / Add.
+def Hexagon_A2_addsp:
+ di_ALU64_sidi <"add", int_hexagon_A2_addsp>;
+def Hexagon_A2_addpsat:
+ di_ALU64_didi <"add", int_hexagon_A2_addpsat>;
+
+def Hexagon_A2_maxp:
+ di_ALU64_didi <"max", int_hexagon_A2_maxp>;
+def Hexagon_A2_maxup:
+ di_ALU64_didi <"maxu", int_hexagon_A2_maxup>;
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/lib/Target/Hexagon/HexagonIntrinsicsV4.td
new file mode 100644
index 0000000..dd28ebb
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV4.td
@@ -0,0 +1,369 @@
+//===- HexagonIntrinsicsV4.td - V4 Instruction intrinsics --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is populated based on the following specs:
+// Hexagon V4 Architecture Extensions
+// Application-Level Specification
+// 80-V9418-12 Rev. A
+// June 15, 2010
+
+
+//
+// ALU 32 types.
+//
+
+class si_ALU32_sisi_not<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_ALU32_s8si<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>;
+
+class di_ALU32_sis8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_neg_ALU32_sisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_neg_ALU32_sis10<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_neg_ALU32_siu9<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_neg_ALU32_sisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_neg_ALU32_sis8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_sis8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+
+//
+// SInst Classes.
+//
+class qi_neg_SInst_qiqi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_qi_andqiqi_neg<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, and($src2, !$src3)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class qi_SInst_qi_andqiqi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, and($src2, $src3)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class qi_SInst_qi_orqiqi_neg<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, or($src2, !$src3)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class qi_SInst_qi_orqiqi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, or($src2, $src3)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_SInst_si_addsis6<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, add($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ imm:$src3))]>;
+
+class si_SInst_si_subs6si<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, sub(#$src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2,
+ IntRegs:$src3))]>;
+
+class di_ALU64_didi_neg<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_MInst_dididi_xacc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_and<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst &= ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_MInst_sisisi_andn<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst &= ", !strconcat(opc , "($src2, ~$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_SInst_sisis10_andi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s10Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, and($src2, #$src3))")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ imm:$src3))]>;
+
+class si_MInst_sisisi_xor<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_MInst_sisisi_xorn<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src2, ~$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_SInst_sisis10_or<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, s10Imm:$src3),
+ !strconcat("$dst |= ", !strconcat(opc , "($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ imm:$src3))]>;
+
+class si_MInst_sisisi_or<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst |= ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_MInst_sisisi_orn<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst |= ", !strconcat(opc , "($src2, ~$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_SInst_siu5_sat<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+
+/********************************************************************
+* ALU32/ALU *
+*********************************************************************/
+
+// ALU32 / ALU / Logical Operations.
+def Hexagon_A4_orn : si_ALU32_sisi_not <"or", int_hexagon_A4_orn>;
+def Hexagon_A4_andn : si_ALU32_sisi_not <"and", int_hexagon_A4_andn>;
+
+
+/********************************************************************
+* ALU32/PERM *
+*********************************************************************/
+
+// ALU32 / PERM / Combine Words Into Doublewords.
+def Hexagon_A4_combineir : di_ALU32_s8si <"combine", int_hexagon_A4_combineir>;
+def Hexagon_A4_combineri : di_ALU32_sis8 <"combine", int_hexagon_A4_combineri>;
+
+
+/********************************************************************
+* ALU32/PRED *
+*********************************************************************/
+
+// ALU32 / PRED / Conditional Shift Halfword.
+// ALU32 / PRED / Conditional Sign Extend.
+// ALU32 / PRED / Conditional Zero Extend.
+// ALU32 / PRED / Compare.
+def Hexagon_C4_cmpneq : qi_neg_ALU32_sisi <"cmp.eq", int_hexagon_C4_cmpneq>;
+def Hexagon_C4_cmpneqi : qi_neg_ALU32_sis10 <"cmp.eq", int_hexagon_C4_cmpneqi>;
+def Hexagon_C4_cmplte : qi_neg_ALU32_sisi <"cmp.gt", int_hexagon_C4_cmplte>;
+def Hexagon_C4_cmpltei : qi_neg_ALU32_sis10 <"cmp.gt", int_hexagon_C4_cmpltei>;
+def Hexagon_C4_cmplteu : qi_neg_ALU32_sisi <"cmp.gtu",int_hexagon_C4_cmplteu>;
+def Hexagon_C4_cmplteui: qi_neg_ALU32_siu9 <"cmp.gtu",int_hexagon_C4_cmplteui>;
+
+// ALU32 / PRED / cmpare To General Register.
+def Hexagon_A4_rcmpneq : si_neg_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpneq>;
+def Hexagon_A4_rcmpneqi: si_neg_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpneqi>;
+def Hexagon_A4_rcmpeq : si_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpeq>;
+def Hexagon_A4_rcmpeqi : si_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpeqi>;
+
+
+/********************************************************************
+* CR *
+*********************************************************************/
+
+// CR / Corner Detection Acceleration.
+def Hexagon_C4_fastcorner9:
+ qi_SInst_qiqi<"fastcorner9", int_hexagon_C4_fastcorner9>;
+def Hexagon_C4_fastcorner9_not:
+ qi_neg_SInst_qiqi<"fastcorner9",int_hexagon_C4_fastcorner9_not>;
+
+// CR / Logical Operations On Predicates.
+def Hexagon_C4_and_andn:
+ qi_SInst_qi_andqiqi_neg <"and", int_hexagon_C4_and_andn>;
+def Hexagon_C4_and_and:
+ qi_SInst_qi_andqiqi <"and", int_hexagon_C4_and_and>;
+def Hexagon_C4_and_orn:
+ qi_SInst_qi_orqiqi_neg <"and", int_hexagon_C4_and_orn>;
+def Hexagon_C4_and_or:
+ qi_SInst_qi_orqiqi <"and", int_hexagon_C4_and_or>;
+def Hexagon_C4_or_andn:
+ qi_SInst_qi_andqiqi_neg <"or", int_hexagon_C4_or_andn>;
+def Hexagon_C4_or_and:
+ qi_SInst_qi_andqiqi <"or", int_hexagon_C4_or_and>;
+def Hexagon_C4_or_orn:
+ qi_SInst_qi_orqiqi_neg <"or", int_hexagon_C4_or_orn>;
+def Hexagon_C4_or_or:
+ qi_SInst_qi_orqiqi <"or", int_hexagon_C4_or_or>;
+
+
+/********************************************************************
+* XTYPE/ALU *
+*********************************************************************/
+
+// XTYPE / ALU / Add And Accumulate.
+def Hexagon_S4_addaddi:
+ si_SInst_si_addsis6 <"add", int_hexagon_S4_addaddi>;
+def Hexagon_S4_subaddi:
+ si_SInst_si_subs6si <"add", int_hexagon_S4_subaddi>;
+
+// XTYPE / ALU / Logical Doublewords.
+def Hexagon_S4_andnp:
+ di_ALU64_didi_neg <"and", int_hexagon_A4_andnp>;
+def Hexagon_S4_ornp:
+ di_ALU64_didi_neg <"or", int_hexagon_A4_ornp>;
+
+// XTYPE / ALU / Logical-logical Doublewords.
+def Hexagon_M4_xor_xacc:
+ di_MInst_dididi_xacc <"xor", int_hexagon_M4_xor_xacc>;
+
+// XTYPE / ALU / Logical-logical Words.
+def HEXAGON_M4_and_and:
+ si_MInst_sisisi_and <"and", int_hexagon_M4_and_and>;
+def HEXAGON_M4_and_or:
+ si_MInst_sisisi_and <"or", int_hexagon_M4_and_or>;
+def HEXAGON_M4_and_xor:
+ si_MInst_sisisi_and <"xor", int_hexagon_M4_and_xor>;
+def HEXAGON_M4_and_andn:
+ si_MInst_sisisi_andn <"and", int_hexagon_M4_and_andn>;
+def HEXAGON_M4_xor_and:
+ si_MInst_sisisi_xor <"and", int_hexagon_M4_xor_and>;
+def HEXAGON_M4_xor_or:
+ si_MInst_sisisi_xor <"or", int_hexagon_M4_xor_or>;
+def HEXAGON_M4_xor_andn:
+ si_MInst_sisisi_xorn <"and", int_hexagon_M4_xor_andn>;
+def HEXAGON_M4_or_and:
+ si_MInst_sisisi_or <"and", int_hexagon_M4_or_and>;
+def HEXAGON_M4_or_or:
+ si_MInst_sisisi_or <"or", int_hexagon_M4_or_or>;
+def HEXAGON_M4_or_xor:
+ si_MInst_sisisi_or <"xor", int_hexagon_M4_or_xor>;
+def HEXAGON_M4_or_andn:
+ si_MInst_sisisi_orn <"and", int_hexagon_M4_or_andn>;
+def HEXAGON_S4_or_andix:
+ si_SInst_sisis10_andi <"or", int_hexagon_S4_or_andix>;
+def HEXAGON_S4_or_andi:
+ si_SInst_sisis10_or <"and", int_hexagon_S4_or_andi>;
+def HEXAGON_S4_or_ori:
+ si_SInst_sisis10_or <"or", int_hexagon_S4_or_ori>;
+
+// XTYPE / ALU / Modulo wrap.
+def HEXAGON_A4_modwrapu:
+ si_ALU64_sisi <"modwrap", int_hexagon_A4_modwrapu>;
+
+// XTYPE / ALU / Round.
+def HEXAGON_A4_cround_ri:
+ si_SInst_siu5 <"cround", int_hexagon_A4_cround_ri>;
+def HEXAGON_A4_cround_rr:
+ si_SInst_sisi <"cround", int_hexagon_A4_cround_rr>;
+def HEXAGON_A4_round_ri:
+ si_SInst_siu5 <"round", int_hexagon_A4_round_ri>;
+def HEXAGON_A4_round_rr:
+ si_SInst_sisi <"round", int_hexagon_A4_round_rr>;
+def HEXAGON_A4_round_ri_sat:
+ si_SInst_siu5_sat <"round", int_hexagon_A4_round_ri_sat>;
+def HEXAGON_A4_round_rr_sat:
+ si_SInst_sisi_sat <"round", int_hexagon_A4_round_rr_sat>;
+
+// XTYPE / ALU / Vector reduce add unsigned halfwords.
+// XTYPE / ALU / Vector add bytes.
+// XTYPE / ALU / Vector conditional negate.
+// XTYPE / ALU / Vector maximum bytes.
+// XTYPE / ALU / Vector reduce maximum halfwords.
+// XTYPE / ALU / Vector reduce maximum words.
+// XTYPE / ALU / Vector minimum bytes.
+// XTYPE / ALU / Vector reduce minimum halfwords.
+// XTYPE / ALU / Vector reduce minimum words.
+// XTYPE / ALU / Vector subtract bytes.
+
+
+/********************************************************************
+* XTYPE/BIT *
+*********************************************************************/
+
+// XTYPE / BIT / Count leading.
+// XTYPE / BIT / Count trailing.
+// XTYPE / BIT / Extract bitfield.
+// XTYPE / BIT / Masked parity.
+// XTYPE / BIT / Bit reverse.
+// XTYPE / BIT / Split bitfield.
+
+
+/********************************************************************
+* XTYPE/COMPLEX *
+*********************************************************************/
+
+// XTYPE / COMPLEX / Complex add/sub halfwords.
+// XTYPE / COMPLEX / Complex add/sub words.
+// XTYPE / COMPLEX / Complex multiply 32x16.
+// XTYPE / COMPLEX / Vector reduce complex rotate.
+
+
+/********************************************************************
+* XTYPE/MPY *
+*********************************************************************/
+
+// XTYPE / COMPLEX / Complex add/sub halfwords.
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
new file mode 100644
index 0000000..0318c51
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -0,0 +1,75 @@
+//=- HexagonMachineFuctionInfo.h - Hexagon machine function info --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonMACHINEFUNCTIONINFO_H
+#define HexagonMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+ namespace Hexagon {
+ const unsigned int StartPacket = 0x1;
+ const unsigned int EndPacket = 0x2;
+ }
+
+
+/// Hexagon target-specific information for each MachineFunction.
+class HexagonMachineFunctionInfo : public MachineFunctionInfo {
+ // SRetReturnReg - Some subtargets require that sret lowering includes
+ // returning the value of the returned struct in a register. This field
+ // holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+ std::vector<MachineInstr*> AllocaAdjustInsts;
+ int VarArgsFrameIndex;
+ bool HasClobberLR;
+
+ std::map<const MachineInstr*, unsigned> PacketInfo;
+
+
+public:
+ HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0) {}
+
+ HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0),
+ HasClobberLR(0) {}
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+ void addAllocaAdjustInst(MachineInstr* MI) {
+ AllocaAdjustInsts.push_back(MI);
+ }
+ const std::vector<MachineInstr*>& getAllocaAdjustInsts() {
+ return AllocaAdjustInsts;
+ }
+
+ void setVarArgsFrameIndex(int v) { VarArgsFrameIndex = v; }
+ int getVarArgsFrameIndex() { return VarArgsFrameIndex; }
+
+ void setStartPacket(MachineInstr* MI) {
+ PacketInfo[MI] |= Hexagon::StartPacket;
+ }
+ void setEndPacket(MachineInstr* MI) {
+ PacketInfo[MI] |= Hexagon::EndPacket;
+ }
+ bool isStartPacket(const MachineInstr* MI) const {
+ return (PacketInfo.count(MI) &&
+ (PacketInfo.find(MI)->second & Hexagon::StartPacket));
+ }
+ bool isEndPacket(const MachineInstr* MI) const {
+ return (PacketInfo.count(MI) &&
+ (PacketInfo.find(MI)->second & Hexagon::EndPacket));
+ }
+ void setHasClobberLR(bool v) { HasClobberLR = v; }
+ bool hasClobberLR() const { return HasClobberLR; }
+
+};
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonOptimizeSZExtends.cpp b/lib/Target/Hexagon/HexagonOptimizeSZExtends.cpp
new file mode 100644
index 0000000..1229aca
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonOptimizeSZExtends.cpp
@@ -0,0 +1,129 @@
+//===-- HexagonOptimizeSZExtends.cpp - Identify and remove sign and -------===//
+//===-- zero extends. -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/PassSupport.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <algorithm>
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+ struct HexagonOptimizeSZExtends : public MachineFunctionPass {
+
+ public:
+ static char ID;
+ HexagonOptimizeSZExtends() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const {
+ return "Hexagon remove redundant zero and size extends";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ };
+}
+
+char HexagonOptimizeSZExtends::ID = 0;
+
+// This is a brain dead pass to get rid of redundant sign extends for the
+// following case:
+//
+// Transform the following pattern
+// %vreg170<def> = SXTW %vreg166
+// ...
+// %vreg176<def> = COPY %vreg170:subreg_loreg
+//
+// Into
+// %vreg176<def> = COPY vreg166
+
+bool HexagonOptimizeSZExtends::runOnMachineFunction(MachineFunction &MF) {
+ DenseMap<unsigned, unsigned> SExtMap;
+
+ // Loop over all of the basic blocks
+ for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock* MBB = MBBb;
+ SExtMap.clear();
+
+ // Traverse the basic block.
+ for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+ ++MII) {
+ MachineInstr *MI = MII;
+ // Look for sign extends:
+ // %vreg170<def> = SXTW %vreg166
+ if (MI->getOpcode() == Hexagon::SXTW) {
+ assert (MI->getNumOperands() == 2);
+ MachineOperand &Dst = MI->getOperand(0);
+ MachineOperand &Src = MI->getOperand(1);
+ unsigned DstReg = Dst.getReg();
+ unsigned SrcReg = Src.getReg();
+ // Just handle virtual registers.
+ if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Map the following:
+ // %vreg170<def> = SXTW %vreg166
+ // SExtMap[170] = vreg166
+ SExtMap[DstReg] = SrcReg;
+ }
+ }
+ // Look for copy:
+ // %vreg176<def> = COPY %vreg170:subreg_loreg
+ if (MI->isCopy()) {
+ assert (MI->getNumOperands() == 2);
+ MachineOperand &Dst = MI->getOperand(0);
+ MachineOperand &Src = MI->getOperand(1);
+
+ // Make sure we are copying the lower 32 bits.
+ if (Src.getSubReg() != Hexagon::subreg_loreg)
+ continue;
+
+ unsigned DstReg = Dst.getReg();
+ unsigned SrcReg = Src.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Try to find in the map.
+ if (unsigned SextSrc = SExtMap.lookup(SrcReg)) {
+ // Change the 1st operand.
+ MI->RemoveOperand(1);
+ MI->addOperand(MachineOperand::CreateReg(SextSrc, false));
+ }
+ }
+ }
+ }
+ }
+ return true;
+}
+
+FunctionPass *llvm::createHexagonOptimizeSZExtends() {
+ return new HexagonOptimizeSZExtends();
+}
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
new file mode 100644
index 0000000..521e0c1
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -0,0 +1,323 @@
+//==- HexagonRegisterInfo.cpp - Hexagon Register Information -----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <iostream>
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Function.h"
+using namespace llvm;
+
+
+HexagonRegisterInfo::HexagonRegisterInfo(HexagonSubtarget &st,
+ const HexagonInstrInfo &tii)
+ : HexagonGenRegisterInfo(Hexagon::R31),
+ Subtarget(st),
+ TII(tii) {
+}
+
+const unsigned* HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction
+ *MF)
+ const {
+ static const unsigned CalleeSavedRegsV2[] = {
+ Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0
+ };
+ static const unsigned CalleeSavedRegsV3[] = {
+ Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
+ Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23,
+ Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0
+ };
+
+ switch(Subtarget.getHexagonArchVersion()) {
+ case HexagonSubtarget::V2:
+ return CalleeSavedRegsV2;
+ break;
+ case HexagonSubtarget::V3:
+ case HexagonSubtarget::V4:
+ return CalleeSavedRegsV3;
+ break;
+ default:
+ const char *ErrorString =
+ "Callee saved registers requested for unknown archtecture version";
+ llvm_unreachable(ErrorString);
+ }
+}
+
+BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
+ const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(HEXAGON_RESERVED_REG_1);
+ Reserved.set(HEXAGON_RESERVED_REG_2);
+ Reserved.set(Hexagon::R29);
+ Reserved.set(Hexagon::R30);
+ Reserved.set(Hexagon::R31);
+ Reserved.set(Hexagon::D14);
+ Reserved.set(Hexagon::D15);
+ Reserved.set(Hexagon::LC0);
+ Reserved.set(Hexagon::LC1);
+ Reserved.set(Hexagon::SA0);
+ Reserved.set(Hexagon::SA1);
+ return Reserved;
+}
+
+
+const TargetRegisterClass* const*
+HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClassesV2[] = {
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ };
+ static const TargetRegisterClass * const CalleeSavedRegClassesV3[] = {
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ };
+
+ switch(Subtarget.getHexagonArchVersion()) {
+ case HexagonSubtarget::V2:
+ return CalleeSavedRegClassesV2;
+ break;
+ case HexagonSubtarget::V3:
+ case HexagonSubtarget::V4:
+ return CalleeSavedRegClassesV3;
+ break;
+ default:
+ const char *ErrorString =
+ "Callee saved register classes requested for unknown archtecture version";
+ llvm_unreachable(ErrorString);
+ }
+}
+
+void HexagonRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ MachineInstr &MI = *I;
+
+ if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) {
+ // Hexagon_TODO: add code
+ } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) {
+ // Hexagon_TODO: add code
+ } else {
+ assert(0 && "Cannot handle this call frame pseudo instruction");
+ }
+ MBB.erase(I);
+}
+
+void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+
+ //
+ // Hexagon_TODO: Do we need to enforce this for Hexagon?
+ assert(SPAdj == 0 && "Unexpected");
+
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ // Addressable stack objects are accessed using neg. offsets from %fp.
+ MachineFunction &MF = *MI.getParent()->getParent();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ unsigned FrameReg = getFrameRegister(MF);
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ if (!TFI->hasFP(MF)) {
+ // We will not reserve space on the stack for the lr and fp registers.
+ Offset -= 2 * Hexagon_WordSize;
+ }
+
+ const unsigned FrameSize = MFI.getStackSize();
+
+ if (!MFI.hasVarSizedObjects() &&
+ TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) &&
+ !TII.isSpillPredRegOp(&MI)) {
+ // Replace frame index with a stack pointer reference.
+ MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false, true);
+ MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset);
+ } else {
+ // Replace frame index with a frame pointer reference.
+ if (!TII.isValidOffset(MI.getOpcode(), Offset)) {
+
+ // If the offset overflows, then correct it.
+ //
+ // For loads, we do not need a reserved register
+ // r0 = memw(r30 + #10000) to:
+ //
+ // r0 = add(r30, #10000)
+ // r0 = memw(r0)
+ if ( (MI.getOpcode() == Hexagon::LDriw) ||
+ (MI.getOpcode() == Hexagon::LDrid) ||
+ (MI.getOpcode() == Hexagon::LDrih) ||
+ (MI.getOpcode() == Hexagon::LDriuh) ||
+ (MI.getOpcode() == Hexagon::LDrib) ||
+ (MI.getOpcode() == Hexagon::LDriub) ) {
+ unsigned dstReg = (MI.getOpcode() == Hexagon::LDrid) ?
+ *getSubRegisters(MI.getOperand(0).getReg()) :
+ MI.getOperand(0).getReg();
+
+ // Check if offset can fit in addi.
+ if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset);
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_rr),
+ dstReg).addReg(FrameReg).addReg(dstReg);
+ } else {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_ri),
+ dstReg).addReg(FrameReg).addImm(Offset);
+ }
+
+ MI.getOperand(i).ChangeToRegister(dstReg, false, false, true);
+ MI.getOperand(i+1).ChangeToImmediate(0);
+ } else if ((MI.getOpcode() == Hexagon::STriw) ||
+ (MI.getOpcode() == Hexagon::STrid) ||
+ (MI.getOpcode() == Hexagon::STrih) ||
+ (MI.getOpcode() == Hexagon::STrib) ||
+ (MI.getOpcode() == Hexagon::STriwt)) {
+ // For stores, we need a reserved register. Change
+ // memw(r30 + #10000) = r0 to:
+ //
+ // rs = add(r30, #10000);
+ // memw(rs) = r0
+ unsigned resReg = HEXAGON_RESERVED_REG_1;
+
+ // Check if offset can fit in addi.
+ if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset);
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_rr),
+ resReg).addReg(FrameReg).addReg(resReg);
+ } else {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_ri),
+ resReg).addReg(FrameReg).addImm(Offset);
+ }
+ MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
+ MI.getOperand(i+1).ChangeToImmediate(0);
+ } else if (TII.isMemOp(&MI)) {
+ unsigned resReg = HEXAGON_RESERVED_REG_1;
+ if (!MFI.hasVarSizedObjects() &&
+ TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) {
+ MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false,
+ true);
+ MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset);
+ } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset);
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_rr),
+ resReg).addReg(FrameReg).addReg(resReg);
+ MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
+ MI.getOperand(i+1).ChangeToImmediate(0);
+ } else {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_ri),
+ resReg).addReg(FrameReg).addImm(Offset);
+ MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
+ MI.getOperand(i+1).ChangeToImmediate(0);
+ }
+ } else {
+ unsigned dstReg = MI.getOperand(0).getReg();
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset);
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_rr),
+ dstReg).addReg(FrameReg).addReg(dstReg);
+ // Can we delete MI??? r2 = add (r2, #0).
+ MI.getOperand(i).ChangeToRegister(dstReg, false, false, true);
+ MI.getOperand(i+1).ChangeToImmediate(0);
+ }
+ } else {
+ // If the offset is small enough to fit in the immediate field, directly
+ // encode it.
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ }
+ }
+
+}
+
+unsigned HexagonRegisterInfo::getRARegister() const {
+ return Hexagon::R31;
+}
+
+unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction
+ &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ if (TFI->hasFP(MF)) {
+ return Hexagon::R30;
+ }
+
+ return Hexagon::R29;
+}
+
+unsigned HexagonRegisterInfo::getFrameRegister() const {
+ return Hexagon::R30;
+}
+
+unsigned HexagonRegisterInfo::getStackRegister() const {
+ return Hexagon::R29;
+}
+
+void HexagonRegisterInfo::getInitialFrameState(std::vector<MachineMove>
+ &Moves) const
+{
+ // VirtualFP = (R30 + #0).
+ unsigned FPReg = getFrameRegister();
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(FPReg, 0);
+ Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+unsigned HexagonRegisterInfo::getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned HexagonRegisterInfo::getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+#define GET_REGINFO_TARGET_DESC
+#include "HexagonGenRegisterInfo.inc"
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
new file mode 100644
index 0000000..33b0c14
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -0,0 +1,89 @@
+//==- HexagonRegisterInfo.h - Hexagon Register Information Impl --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonREGISTERINFO_H
+#define HexagonREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#define GET_REGINFO_HEADER
+#include "HexagonGenRegisterInfo.inc"
+#include "llvm/MC/MachineLocation.h"
+
+//
+// We try not to hard code the reserved registers in our code,
+// so the following two macros were defined. However, there
+// are still a few places that R11 and R10 are hard wired.
+// See below. If, in the future, we decided to change the reserved
+// register. Don't forget changing the following places.
+//
+// 1. the "Defs" set of STriw_pred in HexagonInstrInfo.td
+// 2. the "Defs" set of LDri_pred in HexagonInstrInfo.td
+// 3. the definition of "IntRegs" in HexagonRegisterInfo.td
+// 4. the definition of "DoubleRegs" in HexagonRegisterInfo.td
+//
+#define HEXAGON_RESERVED_REG_1 Hexagon::R10
+#define HEXAGON_RESERVED_REG_2 Hexagon::R11
+
+namespace llvm {
+
+class HexagonSubtarget;
+class HexagonInstrInfo;
+class Type;
+
+struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
+ HexagonSubtarget &Subtarget;
+ const HexagonInstrInfo &TII;
+
+ HexagonRegisterInfo(HexagonSubtarget &st, const HexagonInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const* getCalleeSavedRegClasses(
+ const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ /// determineFrameLayout - Determine the size of the frame and maximum call
+ /// frame size.
+ void determineFrameLayout(MachineFunction &MF) const;
+
+ /// requiresRegisterScavenging - returns true since we may need scavenging for
+ /// a temporary register when generating hardware loop instructions.
+ bool requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+ }
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+ unsigned getFrameRegister() const;
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+ unsigned getStackRegister() const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td
new file mode 100644
index 0000000..c05f844
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -0,0 +1,169 @@
+//===- HexagonRegisterInfo.td - Hexagon Register defs ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the Hexagon register file.
+//===----------------------------------------------------------------------===//
+
+class HexagonReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "Hexagon";
+}
+
+class HexagonDoubleReg<string n, list<Register> subregs> :
+ RegisterWithSubRegs<n, subregs> {
+ field bits<5> Num;
+ let Namespace = "Hexagon";
+}
+
+// Registers are identified with 5-bit ID numbers.
+// Ri - 32-bit integer registers.
+class Ri<bits<5> num, string n> : HexagonReg<n> {
+ let Num = num;
+}
+
+// Rf - 32-bit floating-point registers.
+class Rf<bits<5> num, string n> : HexagonReg<n> {
+ let Num = num;
+}
+
+
+// Rd - 64 bit registers.
+class Rd<bits<5> num, string n, list<Register> subregs> :
+HexagonDoubleReg<n, subregs> {
+ let Num = num;
+ let SubRegs = subregs;
+}
+
+
+class Rp<bits<5> num, string n> : HexagonReg<n> {
+ let Num = num;
+}
+
+class Rc<bits<5> num, string n> : HexagonReg<n> {
+ let Num = num;
+}
+
+let Namespace = "Hexagon" in {
+
+ def subreg_loreg : SubRegIndex;
+ def subreg_hireg : SubRegIndex;
+
+ // Integer registers.
+ def R0 : Ri< 0, "r0">, DwarfRegNum<[0]>;
+ def R1 : Ri< 1, "r1">, DwarfRegNum<[1]>;
+ def R2 : Ri< 2, "r2">, DwarfRegNum<[2]>;
+ def R3 : Ri< 3, "r3">, DwarfRegNum<[3]>;
+ def R4 : Ri< 4, "r4">, DwarfRegNum<[4]>;
+ def R5 : Ri< 5, "r5">, DwarfRegNum<[5]>;
+ def R6 : Ri< 6, "r6">, DwarfRegNum<[6]>;
+ def R7 : Ri< 7, "r7">, DwarfRegNum<[7]>;
+ def R8 : Ri< 8, "r8">, DwarfRegNum<[8]>;
+ def R9 : Ri< 9, "r9">, DwarfRegNum<[9]>;
+ def R10 : Ri<10, "r10">, DwarfRegNum<[10]>;
+ def R11 : Ri<11, "r11">, DwarfRegNum<[11]>;
+ def R12 : Ri<12, "r12">, DwarfRegNum<[12]>;
+ def R13 : Ri<13, "r13">, DwarfRegNum<[13]>;
+ def R14 : Ri<14, "r14">, DwarfRegNum<[14]>;
+ def R15 : Ri<15, "r15">, DwarfRegNum<[15]>;
+ def R16 : Ri<16, "r16">, DwarfRegNum<[16]>;
+ def R17 : Ri<17, "r17">, DwarfRegNum<[17]>;
+ def R18 : Ri<18, "r18">, DwarfRegNum<[18]>;
+ def R19 : Ri<19, "r19">, DwarfRegNum<[19]>;
+ def R20 : Ri<20, "r20">, DwarfRegNum<[20]>;
+ def R21 : Ri<21, "r21">, DwarfRegNum<[21]>;
+ def R22 : Ri<22, "r22">, DwarfRegNum<[22]>;
+ def R23 : Ri<23, "r23">, DwarfRegNum<[23]>;
+ def R24 : Ri<24, "r24">, DwarfRegNum<[24]>;
+ def R25 : Ri<25, "r25">, DwarfRegNum<[25]>;
+ def R26 : Ri<26, "r26">, DwarfRegNum<[26]>;
+ def R27 : Ri<27, "r27">, DwarfRegNum<[27]>;
+ def R28 : Ri<28, "r28">, DwarfRegNum<[28]>;
+ def R29 : Ri<29, "r29">, DwarfRegNum<[29]>;
+ def R30 : Ri<30, "r30">, DwarfRegNum<[30]>;
+ def R31 : Ri<31, "r31">, DwarfRegNum<[31]>;
+
+
+ def PC : Ri<31, "r31">, DwarfRegNum<[32]>;
+ def GP : Ri<31, "r31">, DwarfRegNum<[33]>;
+
+ // Aliases of the R* registers used to hold 64-bit int values (doubles).
+ let SubRegIndices = [subreg_loreg, subreg_hireg] in {
+ def D0 : Rd< 0, "r1:0", [R0, R1]>, DwarfRegNum<[32]>;
+ def D1 : Rd< 2, "r3:2", [R2, R3]>, DwarfRegNum<[34]>;
+ def D2 : Rd< 4, "r5:4", [R4, R5]>, DwarfRegNum<[36]>;
+ def D3 : Rd< 6, "r7:6", [R6, R7]>, DwarfRegNum<[38]>;
+ def D4 : Rd< 8, "r9:8", [R8, R9]>, DwarfRegNum<[40]>;
+ def D5 : Rd<10, "r11:10", [R10, R11]>, DwarfRegNum<[42]>;
+ def D6 : Rd<12, "r13:12", [R12, R13]>, DwarfRegNum<[44]>;
+ def D7 : Rd<14, "r15:14", [R14, R15]>, DwarfRegNum<[46]>;
+ def D8 : Rd<16, "r17:16", [R16, R17]>, DwarfRegNum<[48]>;
+ def D9 : Rd<18, "r19:18", [R18, R19]>, DwarfRegNum<[50]>;
+ def D10 : Rd<20, "r21:20", [R20, R21]>, DwarfRegNum<[52]>;
+ def D11 : Rd<22, "r23:22", [R22, R23]>, DwarfRegNum<[54]>;
+ def D12 : Rd<24, "r25:24", [R24, R25]>, DwarfRegNum<[56]>;
+ def D13 : Rd<26, "r27:26", [R26, R27]>, DwarfRegNum<[58]>;
+ def D14 : Rd<28, "r29:28", [R28, R29]>, DwarfRegNum<[60]>;
+ def D15 : Rd<30, "r31:30", [R30, R31]>, DwarfRegNum<[62]>;
+ }
+
+ // Predicate registers.
+ def P0 : Rp< 0, "p0">, DwarfRegNum<[63]>;
+ def P1 : Rp< 0, "p1">, DwarfRegNum<[64]>;
+ def P2 : Rp< 0, "p2">, DwarfRegNum<[65]>;
+ def P3 : Rp< 0, "p3">, DwarfRegNum<[66]>;
+
+ // Control registers.
+ def SA0 : Rc<0, "sa0">, DwarfRegNum<[67]>;
+ def LC0 : Rc<0, "lc0">, DwarfRegNum<[68]>;
+
+ def SA1 : Rc<0, "sa1">, DwarfRegNum<[69]>;
+ def LC1 : Rc<0, "lc1">, DwarfRegNum<[70]>;
+}
+
+
+
+
+
+
+
+
+
+
+// Register classes.
+//
+// FIXME: the register order should be defined in terms of the preferred
+// allocation order...
+//
+def IntRegs : RegisterClass<"Hexagon", [i32], 32, (add (sequence "R%u", 0, 9),
+ (sequence "R%u", 12, 28),
+ R10, R11, R29, R30,
+ R31)> {
+}
+
+
+
+def DoubleRegs : RegisterClass<"Hexagon", [i64], 64, (add (sequence "D%u", 0,
+ 4),
+ (sequence "D%u", 6, 13),
+ D5, D14, D15)> {
+ let SubRegClasses = [(IntRegs subreg_loreg, subreg_hireg)];
+}
+
+
+def PredRegs : RegisterClass<"Hexagon", [i1], 32, (add (sequence "P%u", 0, 3))>
+{
+ let Size = 32;
+}
+
+def CRRegs : RegisterClass<"Hexagon", [i32], 32, (add (sequence "LC%u", 0, 1),
+ (sequence "SA%u", 0, 1),
+ PC)> {
+ let Size = 32;
+}
diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
new file mode 100644
index 0000000..3ca257f
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
@@ -0,0 +1,85 @@
+//=- HexagonRemoveExtendArgs.cpp - Remove unecessary argument sign extends --=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass that removes sign extends for function parameters. These parameters
+// are already sign extended by the caller per Hexagon's ABI
+//
+//===----------------------------------------------------------------------===//
+
+
+
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "HexagonTargetMachine.h"
+#include <iostream>
+
+using namespace llvm;
+namespace {
+ struct HexagonRemoveExtendArgs : public FunctionPass {
+ public:
+ static char ID;
+ HexagonRemoveExtendArgs() : FunctionPass(ID) {}
+ virtual bool runOnFunction(Function &F);
+
+ const char *getPassName() const {
+ return "Remove sign extends";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char HexagonRemoveExtendArgs::ID = 0;
+RegisterPass<HexagonRemoveExtendArgs> X("reargs",
+ "Remove Sign and Zero Extends for Args"
+ );
+
+
+
+bool HexagonRemoveExtendArgs::runOnFunction(Function &F) {
+ unsigned Idx = 1;
+ for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE;
+ ++AI, ++Idx) {
+ if (F.paramHasAttr(Idx, Attribute::SExt)) {
+ Argument* Arg = AI;
+ if (!isa<PointerType>(Arg->getType())) {
+ for (Instruction::use_iterator UI = Arg->use_begin();
+ UI != Arg->use_end();) {
+ if (isa<SExtInst>(*UI)) {
+ Instruction* Use = cast<Instruction>(*UI);
+ SExtInst* SI = new SExtInst(Arg, Use->getType());
+ assert (EVT::getEVT(SI->getType()) ==
+ (EVT::getEVT(Use->getType())));
+ ++UI;
+ Use->replaceAllUsesWith(SI);
+ Instruction* First = F.getEntryBlock().begin();
+ SI->insertBefore(First);
+ Use->eraseFromParent();
+ } else {
+ ++UI;
+ }
+ }
+ }
+ }
+ }
+ return true;
+}
+
+
+
+FunctionPass *llvm::createHexagonRemoveExtendOps(HexagonTargetMachine &TM) {
+ return new HexagonRemoveExtendArgs();
+}
diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td
new file mode 100644
index 0000000..427d1cb
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSchedule.td
@@ -0,0 +1,53 @@
+//===-HexagonSchedule.td - Hexagon Scheduling Definitions -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Functional Units
+def LUNIT : FuncUnit;
+def LSUNIT : FuncUnit;
+def MUNIT : FuncUnit;
+def SUNIT : FuncUnit;
+
+
+// Itinerary classes
+def ALU32 : InstrItinClass;
+def ALU64 : InstrItinClass;
+def CR : InstrItinClass;
+def J : InstrItinClass;
+def JR : InstrItinClass;
+def LD : InstrItinClass;
+def M : InstrItinClass;
+def ST : InstrItinClass;
+def S : InstrItinClass;
+def PSEUDO : InstrItinClass;
+
+
+def HexagonItineraries :
+ ProcessorItineraries<[LUNIT, LSUNIT, MUNIT, SUNIT], [], [
+ InstrItinData<ALU32 , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
+ InstrItinData<ALU64 , [InstrStage<1, [MUNIT, SUNIT]>]>,
+ InstrItinData<CR , [InstrStage<1, [SUNIT]>]>,
+ InstrItinData<J , [InstrStage<1, [SUNIT, MUNIT]>]>,
+ InstrItinData<JR , [InstrStage<1, [MUNIT]>]>,
+ InstrItinData<LD , [InstrStage<1, [LUNIT, LSUNIT]>]>,
+ InstrItinData<M , [InstrStage<1, [MUNIT, SUNIT]>]>,
+ InstrItinData<ST , [InstrStage<1, [LSUNIT]>]>,
+ InstrItinData<S , [InstrStage<1, [SUNIT, MUNIT]>]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// V4 Machine Info +
+//===----------------------------------------------------------------------===//
+
+include "HexagonScheduleV4.td"
+
+//===----------------------------------------------------------------------===//
+// V4 Machine Info -
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td
new file mode 100644
index 0000000..4cf66fe
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -0,0 +1,56 @@
+//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine.
+// This file describes that machine information.
+
+//
+// |===========|==================================================|
+// | PIPELINE | Instruction Classes |
+// |===========|==================================================|
+// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM |
+// |-----------|--------------------------------------------------|
+// | SLOT1 | LD ST ALU32 |
+// |-----------|--------------------------------------------------|
+// | SLOT2 | XTYPE ALU32 J JR |
+// |-----------|--------------------------------------------------|
+// | SLOT3 | XTYPE ALU32 J CR |
+// |===========|==================================================|
+
+
+// Functional Units.
+def SLOT0 : FuncUnit;
+def SLOT1 : FuncUnit;
+def SLOT2 : FuncUnit;
+def SLOT3 : FuncUnit;
+
+// Itinerary classes.
+def NV_V4 : InstrItinClass;
+def MEM_V4 : InstrItinClass;
+// ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4.
+
+def HexagonItinerariesV4 : ProcessorItineraries<
+ [SLOT0, SLOT1, SLOT2, SLOT3], [], [
+ InstrItinData<LD , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<ST , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<ALU32 , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<NV_V4 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<MEM_V4 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<J , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<JR , [InstrStage<1, [SLOT2]>]>,
+ InstrItinData<CR , [InstrStage<1, [SLOT3]>]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S , [InstrStage<1, [SLOT2, SLOT3]>]>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Hexagon V4 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonSelectCCInfo.td b/lib/Target/Hexagon/HexagonSelectCCInfo.td
new file mode 100644
index 0000000..f21d928
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSelectCCInfo.td
@@ -0,0 +1,121 @@
+//=-HexagoSelectCCInfo.td - Selectcc mappings ----------------*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+//
+// selectcc mappings.
+//
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETEQ)),
+ (i32 (MUX_rr (i1 (CMPEQrr IntRegs:$lhs, IntRegs:$rhs)),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETNE)),
+ (i32 (MUX_rr (i1 (NOT_Ps (CMPEQrr IntRegs:$lhs, IntRegs:$rhs))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETGT)),
+ (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, IntRegs:$rhs)),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETUGT)),
+ (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs)),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETULT)),
+ (i32 (MUX_rr (i1 (NOT_Ps (CMPGTUrr IntRegs:$lhs,
+ (ADD_ri IntRegs:$rhs, -1)))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETLT)),
+ (i32 (MUX_rr (i1 (NOT_Ps (CMPGTrr IntRegs:$lhs,
+ (ADD_ri IntRegs:$rhs, -1)))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETLE)),
+ (i32 (MUX_rr (i1 (NOT_Ps (CMPGTrr IntRegs:$lhs, IntRegs:$rhs))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETULE)),
+ (i32 (MUX_rr (i1 (NOT_Ps (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+
+//
+// selectcc mappings for greater-equal-to Rs => greater-than Rs-1.
+//
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETGE)),
+ (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETUGE)),
+ (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+
+
+//
+// selectcc mappings for predicate comparisons.
+//
+// Convert Rd = selectcc(p0, p1, true_val, false_val, SETEQ) into:
+// pt = not(p1 xor p2)
+// Rd = mux(pt, true_val, false_val)
+// and similarly for SETNE
+//
+def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETNE)),
+ (i32 (MUX_rr (i1 (XOR_pp PredRegs:$lhs, PredRegs:$rhs)), IntRegs:$tval,
+ IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETEQ)),
+ (i32 (MUX_rr (i1 (NOT_pp (XOR_pp PredRegs:$lhs, PredRegs:$rhs))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+
+//
+// selectcc mappings for 64-bit operands are messy. Hexagon does not have a
+// MUX64 o, use this:
+// selectcc(Rss, Rdd, tval, fval, cond) ->
+// combine(mux(cmp_cond(Rss, Rdd), tval.hi, fval.hi),
+// mux(cmp_cond(Rss, Rdd), tval.lo, fval.lo))
+
+// setgt-64.
+def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval,
+ DoubleRegs:$fval, SETGT)),
+ (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs),
+ (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)),
+ (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs),
+ (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>;
+
+
+// setlt-64 -> setgt-64.
+def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval,
+ DoubleRegs:$fval, SETLT)),
+ (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs,
+ (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))),
+ (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)),
+ (MUX_rr (CMPGT64rr DoubleRegs:$lhs,
+ (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))),
+ (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>;
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
new file mode 100644
index 0000000..a52c604
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -0,0 +1,46 @@
+//===-- HexagonSelectionDAGInfo.cpp - Hexagon SelectionDAG Info -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the HexagonSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-selectiondag-info"
+#include "HexagonTargetMachine.h"
+using namespace llvm;
+
+bool llvm::flag_aligned_memcpy;
+
+HexagonSelectionDAGInfo::HexagonSelectionDAGInfo(const HexagonTargetMachine
+ &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+HexagonSelectionDAGInfo::~HexagonSelectionDAGInfo() {
+}
+
+SDValue
+HexagonSelectionDAGInfo::
+EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain,
+ SDValue Dst, SDValue Src, SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ flag_aligned_memcpy = false;
+ if ((Align & 0x3) == 0) {
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if ((SizeVal > 32) && ((SizeVal % 8) == 0))
+ flag_aligned_memcpy = true;
+ }
+ }
+
+ return SDValue();
+}
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
new file mode 100644
index 0000000..86fa026
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -0,0 +1,40 @@
+//=-- HexagonSelectionDAGInfo.h - Hexagon SelectionDAG Info ------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Hexagon subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonSELECTIONDAGINFO_H
+#define HexagonSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class HexagonTargetMachine;
+
+class HexagonSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit HexagonSelectionDAGInfo(const HexagonTargetMachine &TM);
+ ~HexagonSelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
new file mode 100644
index 0000000..f4d3647
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
@@ -0,0 +1,136 @@
+//===---- HexagonSplitTFRCondSets.cpp - split TFR condsets into xfers -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//
+//===----------------------------------------------------------------------===////
+// This pass tries to provide opportunities for better optimization of muxes.
+// The default code generated for something like: flag = (a == b) ? 1 : 3;
+// would be:
+//
+// {p0 = cmp.eq(r0,r1)}
+// {r3 = mux(p0,#1,#3)}
+//
+// This requires two packets. If we use .new predicated immediate transfers,
+// then we can do this in a single packet, e.g.:
+//
+// {p0 = cmp.eq(r0,r1)
+// if (p0.new) r3 = #1
+// if (!p0.new) r3 = #3}
+//
+// Note that the conditional assignments are not generated in .new form here.
+// We assume opptimisically that they will be formed later.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xfer"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+#include <map>
+#include <iostream>
+
+#include "llvm/Support/CommandLine.h"
+#define DEBUG_TYPE "xfer"
+
+
+using namespace llvm;
+
+namespace {
+
+class HexagonSplitTFRCondSets : public MachineFunctionPass {
+ HexagonTargetMachine& QTM;
+ const HexagonSubtarget &QST;
+
+ public:
+ static char ID;
+ HexagonSplitTFRCondSets(HexagonTargetMachine& TM) :
+ MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {}
+
+ const char *getPassName() const {
+ return "Hexagon Split TFRCondSets";
+ }
+ bool runOnMachineFunction(MachineFunction &Fn);
+};
+
+
+char HexagonSplitTFRCondSets::ID = 0;
+
+
+bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
+
+ const TargetInstrInfo *TII = QTM.getInstrInfo();
+
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock* MBB = MBBb;
+ // Traverse the basic block.
+ for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+ ++MII) {
+ MachineInstr *MI = MII;
+ int Opc = MI->getOpcode();
+ if (Opc == Hexagon::TFR_condset_rr) {
+
+ int DestReg = MI->getOperand(0).getReg();
+ int SrcReg1 = MI->getOperand(2).getReg();
+ int SrcReg2 = MI->getOperand(3).getReg();
+
+ // Minor optimization: do not emit the predicated copy if the source and
+ // the destination is the same register
+ if (DestReg != SrcReg1) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_cPt),
+ DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg1);
+ }
+ if (DestReg != SrcReg2) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_cNotPt),
+ DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg2);
+ }
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::TFR_condset_ii) {
+ int DestReg = MI->getOperand(0).getReg();
+ int SrcReg1 = MI->getOperand(1).getReg();
+ int Immed1 = MI->getOperand(2).getImm();
+ int Immed2 = MI->getOperand(3).getImm();
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFRI_cPt),
+ DestReg).addReg(SrcReg1).addImm(Immed1);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFRI_cNotPt),
+ DestReg).addReg(SrcReg1).addImm(Immed2);
+ MII = MBB->erase(MI);
+ --MII;
+ }
+ }
+ }
+
+ return true;
+}
+
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonSplitTFRCondSets(HexagonTargetMachine &TM) {
+ return new HexagonSplitTFRCondSets(TM);
+}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
new file mode 100644
index 0000000..83fb498
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -0,0 +1,59 @@
+//===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Hexagon specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonSubtarget.h"
+#include "Hexagon.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+#define GET_SUBTARGETINFO_CTOR
+#define GET_SUBTARGETINFO_TARGET_DESC
+#include "HexagonGenSubtargetInfo.inc"
+
+static cl::opt<bool>
+EnableV3("enable-hexagon-v3", cl::Hidden,
+ cl::desc("Enable Hexagon V3 instructions."));
+
+static cl::opt<bool>
+EnableMemOps(
+ "enable-hexagon-memops",
+ cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed,
+ cl::desc("Generate V4 MEMOP in code generation for Hexagon target"));
+
+HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
+ HexagonGenSubtargetInfo(TT, CPU, FS),
+ HexagonArchVersion(V1),
+ CPUString(CPU.str()) {
+ ParseSubtargetFeatures(CPU, FS);
+
+ switch(HexagonArchVersion) {
+ case HexagonSubtarget::V2:
+ break;
+ case HexagonSubtarget::V3:
+ EnableV3 = true;
+ break;
+ case HexagonSubtarget::V4:
+ break;
+ default:
+ llvm_unreachable("Unknown Architecture Version.");
+ }
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUString);
+
+ if (EnableMemOps)
+ UseMemOps = true;
+ else
+ UseMemOps = false;
+}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
new file mode 100644
index 0000000..6de85df
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -0,0 +1,74 @@
+//==-- HexagonSubtarget.h - Define Subtarget for the Hexagon ----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef Hexagon_SUBTARGET_H
+#define Hexagon_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "HexagonGenSubtargetInfo.inc"
+
+#define Hexagon_SMALL_DATA_THRESHOLD 8
+
+namespace llvm {
+
+class HexagonSubtarget : public HexagonGenSubtargetInfo {
+
+ bool UseMemOps;
+
+public:
+ enum HexagonArchEnum {
+ V1, V2, V3, V4
+ };
+
+ HexagonArchEnum HexagonArchVersion;
+ std::string CPUString;
+ InstrItineraryData InstrItins;
+
+public:
+ HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS);
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool hasV2TOps () const { return HexagonArchVersion >= V2; }
+ bool hasV2TOpsOnly () const { return HexagonArchVersion == V2; }
+ bool hasV3TOps () const { return HexagonArchVersion >= V3; }
+ bool hasV3TOpsOnly () const { return HexagonArchVersion == V3; }
+ bool hasV4TOps () const { return HexagonArchVersion >= V4; }
+ bool useMemOps () const { return HexagonArchVersion >= V4 && UseMemOps; }
+
+ bool isSubtargetV2() const { return HexagonArchVersion == V2;}
+ const std::string &getCPUString () const { return CPUString; }
+
+ // Threshold for small data section
+ unsigned getSmallDataThreshold() const {
+ return Hexagon_SMALL_DATA_THRESHOLD;
+ }
+ const HexagonArchEnum &getHexagonArchVersion() const {
+ return HexagonArchVersion;
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
new file mode 100644
index 0000000..b29e92c
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -0,0 +1,118 @@
+//===-- HexagonTargetMachine.cpp - Define TargetMachine for Hexagon -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetMachine.h"
+#include "Hexagon.h"
+#include "HexagonISelLowering.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/TargetRegistry.h"
+#include <iostream>
+
+using namespace llvm;
+
+static cl::
+opt<bool> DisableHardwareLoops(
+ "disable-hexagon-hwloops", cl::Hidden,
+ cl::desc("Disable Hardware Loops for Hexagon target"));
+
+/// HexagonTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int HexagonTargetMachineModule;
+int HexagonTargetMachineModule = 0;
+
+extern "C" void LLVMInitializeHexagonTarget() {
+ // Register the target.
+ RegisterTargetMachine<HexagonTargetMachine> X(TheHexagonTarget);
+}
+
+
+/// HexagonTargetMachine ctor - Create an ILP32 architecture model.
+///
+
+/// Hexagon_TODO: Do I need an aggregate alignment?
+///
+HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ TargetOptions Options,
+ Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ DataLayout("e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-a0:0") ,
+ Subtarget(TT, CPU, FS), TLInfo(*this), InstrInfo(Subtarget),
+ TSInfo(*this),
+ FrameLowering(Subtarget),
+ InstrItins(&Subtarget.getInstrItineraryData()) {
+ setMCUseCFI(false);
+}
+
+// addPassesForOptimizations - Allow the backend (target) to add Target
+// Independent Optimization passes to the Pass Manager.
+bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) {
+
+ PM.add(createConstantPropagationPass());
+ PM.add(createLoopSimplifyPass());
+ PM.add(createDeadCodeEliminationPass());
+ PM.add(createConstantPropagationPass());
+ PM.add(createLoopUnrollPass());
+ PM.add(createLoopStrengthReducePass(getTargetLowering()));
+ return true;
+}
+
+bool HexagonTargetMachine::addInstSelector(PassManagerBase &PM) {
+ PM.add(createHexagonRemoveExtendOps(*this));
+ PM.add(createHexagonISelDag(*this));
+ return false;
+}
+
+
+bool HexagonTargetMachine::addPreRegAlloc(PassManagerBase &PM) {
+ if (!DisableHardwareLoops) {
+ PM.add(createHexagonHardwareLoops());
+ }
+
+ return false;
+}
+
+bool HexagonTargetMachine::addPostRegAlloc(PassManagerBase &PM) {
+ PM.add(createHexagonCFGOptimizer(*this));
+ return true;
+}
+
+
+bool HexagonTargetMachine::addPreSched2(PassManagerBase &PM) {
+ PM.add(createIfConverterPass());
+ return true;
+}
+
+bool HexagonTargetMachine::addPreEmitPass(PassManagerBase &PM) {
+
+ if (!DisableHardwareLoops) {
+ PM.add(createHexagonFixupHwLoops());
+ }
+
+ // Expand Spill code for predicate registers.
+ PM.add(createHexagonExpandPredSpillCode(*this));
+
+ // Split up TFRcondsets into conditional transfers.
+ PM.add(createHexagonSplitTFRCondSets(*this));
+
+ return false;
+}
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
new file mode 100644
index 0000000..e27d3ae
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -0,0 +1,86 @@
+//=-- HexagonTargetMachine.h - Define TargetMachine for Hexagon ---*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonTARGETMACHINE_H
+#define HexagonTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonISelLowering.h"
+#include "HexagonSelectionDAGInfo.h"
+#include "HexagonFrameLowering.h"
+
+namespace llvm {
+
+class Module;
+
+class HexagonTargetMachine : public LLVMTargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment.
+ HexagonSubtarget Subtarget;
+ HexagonTargetLowering TLInfo;
+ HexagonInstrInfo InstrInfo;
+ HexagonSelectionDAGInfo TSInfo;
+ HexagonFrameLowering FrameLowering;
+ const InstrItineraryData* InstrItins;
+
+public:
+ HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU,
+ StringRef FS, TargetOptions Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL);
+
+ virtual const HexagonInstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual const HexagonSubtarget *getSubtargetImpl() const {
+ return &Subtarget;
+ }
+ virtual const HexagonRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const InstrItineraryData* getInstrItineraryData() const {
+ return InstrItins;
+ }
+
+
+ virtual const HexagonTargetLowering* getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const HexagonFrameLowering* getFrameLowering() const {
+ return &FrameLowering;
+ }
+
+ virtual const HexagonSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ // Pass Pipeline Configuration.
+ virtual bool addPassesForOptimizations(PassManagerBase &PM);
+ virtual bool addInstSelector(PassManagerBase &PM);
+ virtual bool addPreEmitPass(PassManagerBase &PM);
+ virtual bool addPreRegAlloc(llvm::PassManagerBase &PM);
+ virtual bool addPostRegAlloc(PassManagerBase &PM);
+ virtual bool addPreSched2(PassManagerBase &PM);
+};
+
+extern bool flag_aligned_memcpy;
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
new file mode 100644
index 0000000..188337d
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -0,0 +1,94 @@
+//===-- HexagonTargetObjectFile.cpp - Hexagon asm properties ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the HexagonTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/CommandLine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetObjectFile.h"
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<int> SmallDataThreshold("hexagon-small-data-threshold",
+ cl::init(8), cl::Hidden);
+
+void HexagonTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+
+ SmallDataSection =
+ getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ SmallBSSSection =
+ getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getBSS());
+}
+
+// sdata/sbss support taken largely from the MIPS Backend.
+static bool IsInSmallSection(uint64_t Size) {
+ return Size > 0 && Size <= (uint64_t)SmallDataThreshold;
+}
+/// IsGlobalInSmallSection - Return true if this global value should be
+/// placed into small data/bss section.
+bool HexagonTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const {
+ // If the primary definition of this global value is outside the current
+ // translation unit or the global value is available for inspection but not
+ // emission, then do nothing.
+ if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+ return false;
+
+ // Otherwise, Check if GV should be in sdata/sbss, when normally it would end
+ // up in getKindForGlobal(GV, TM).
+ return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global value should be
+/// placed into small data/bss section.
+bool HexagonTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+ SectionKind Kind) const {
+ // Only global variables, not functions.
+ const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+ if (!GVA)
+ return false;
+
+ if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) {
+ Type *Ty = GV->getType()->getElementType();
+ return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
+ }
+
+ return false;
+}
+
+const MCSection *HexagonTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+
+ // Handle Small Section classification here.
+ if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallBSSSection;
+ if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallDataSection;
+
+ // Otherwise, we work the same as ELF.
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.h b/lib/Target/Hexagon/HexagonTargetObjectFile.h
new file mode 100644
index 0000000..101c1f2
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -0,0 +1,40 @@
+//===-- HexagonTargetAsmInfo.h - Hexagon asm properties ---------*- C++ -*--==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonTARGETOBJECTFILE_H
+#define HexagonTARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCSectionELF.h"
+
+namespace llvm {
+
+ class HexagonTargetObjectFile : public TargetLoweringObjectFileELF {
+ const MCSectionELF *SmallDataSection;
+ const MCSectionELF *SmallBSSSection;
+ public:
+ virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+ /// IsGlobalInSmallSection - Return true if this global address should be
+ /// placed into small data/bss section.
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM,
+ SectionKind Kind) const;
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const;
+
+ const MCSection* SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind,
+ Mangler *Mang,
+ const TargetMachine &TM) const;
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonVarargsCallingConvention.h b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
new file mode 100644
index 0000000..21b2d67
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
@@ -0,0 +1,141 @@
+//==-- HexagonVarargsCallingConvention.h - Calling Conventions ---*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the functions that assign locations to outgoing function
+// arguments. Adapted from the target independent version but this handles
+// calls to varargs functions
+//
+//===----------------------------------------------------------------------===//
+//
+
+
+
+
+static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags,
+ Hexagon_CCState &State,
+ int NonVarArgsParams,
+ int CurrentParam,
+ bool ForceMem);
+
+
+static bool CC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags,
+ Hexagon_CCState &State,
+ int NonVarArgsParams,
+ int CurrentParam,
+ bool ForceMem) {
+ unsigned ByValSize = 0;
+ if (ArgFlags.isByVal() &&
+ ((ByValSize = ArgFlags.getByValSize()) >
+ (MVT(MVT::i64).getSizeInBits() / 8))) {
+ ForceMem = true;
+ }
+
+
+ // Only assign registers for named (non varargs) arguments
+ if ( !ForceMem && ((NonVarArgsParams == -1) || (CurrentParam <=
+ NonVarArgsParams))) {
+
+ if (LocVT == MVT::i32 ||
+ LocVT == MVT::i16 ||
+ LocVT == MVT::i8 ||
+ LocVT == MVT::f32) {
+ static const unsigned RegList1[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5
+ };
+ if (unsigned Reg = State.AllocateReg(RegList1, 6)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+ }
+ }
+
+ if (LocVT == MVT::i64 ||
+ LocVT == MVT::f64) {
+ static const unsigned RegList2[] = {
+ Hexagon::D0, Hexagon::D1, Hexagon::D2
+ };
+ if (unsigned Reg = State.AllocateReg(RegList2, 3)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+ }
+ }
+ }
+
+ const Type* ArgTy = LocVT.getTypeForEVT(State.getContext());
+ unsigned Alignment =
+ State.getTarget().getTargetData()->getABITypeAlignment(ArgTy);
+ unsigned Size =
+ State.getTarget().getTargetData()->getTypeSizeInBits(ArgTy) / 8;
+
+ // If it's passed by value, then we need the size of the aggregate not of
+ // the pointer.
+ if (ArgFlags.isByVal()) {
+ Size = ByValSize;
+
+ // Hexagon_TODO: Get the alignment of the contained type here.
+ Alignment = 8;
+ }
+
+ unsigned Offset3 = State.AllocateStack(Size, Alignment);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+}
+
+
+static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags,
+ Hexagon_CCState &State,
+ int NonVarArgsParams,
+ int CurrentParam,
+ bool ForceMem) {
+
+ if (LocVT == MVT::i32 ||
+ LocVT == MVT::f32) {
+ static const unsigned RegList1[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5
+ };
+ if (unsigned Reg = State.AllocateReg(RegList1, 6)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+ }
+ }
+
+ if (LocVT == MVT::i64 ||
+ LocVT == MVT::f64) {
+ static const unsigned RegList2[] = {
+ Hexagon::D0, Hexagon::D1, Hexagon::D2
+ };
+ if (unsigned Reg = State.AllocateReg(RegList2, 3)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+ }
+ }
+
+ const Type* ArgTy = LocVT.getTypeForEVT(State.getContext());
+ unsigned Alignment =
+ State.getTarget().getTargetData()->getABITypeAlignment(ArgTy);
+ unsigned Size =
+ State.getTarget().getTargetData()->getTypeSizeInBits(ArgTy) / 8;
+
+ unsigned Offset3 = State.AllocateStack(Size, Alignment);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+}
diff --git a/lib/Target/Hexagon/LLVMBuild.txt b/lib/Target/Hexagon/LLVMBuild.txt
new file mode 100644
index 0000000..84ea6a0
--- /dev/null
+++ b/lib/Target/Hexagon/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/Hexagon/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = TargetInfo MCTargetDesc
+
+[component_0]
+type = TargetGroup
+name = Hexagon
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = HexagonCodeGen
+parent = Hexagon
+required_libraries = AsmPrinter CodeGen Core HexagonInfo SelectionDAG Support Target MC HexagonDesc
+add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..8e3da99
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_llvm_library(LLVMHexagonDesc
+ HexagonMCTargetDesc.cpp
+ HexagonMCAsmInfo.cpp
+ )
+
+add_dependencies(LLVMHexagonDesc HexagonCommonTableGen)
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
new file mode 100644
index 0000000..188693c
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -0,0 +1,36 @@
+//===-- HexagonMCAsmInfo.cpp - Hexagon asm properties -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the HexagonMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCAsmInfo.h"
+
+using namespace llvm;
+
+HexagonMCAsmInfo::HexagonMCAsmInfo(const Target &T, StringRef TT) {
+ Data16bitsDirective = "\t.half\t";
+ Data32bitsDirective = "\t.word\t";
+ Data64bitsDirective = 0; // .xword is only supported by V9.
+ ZeroDirective = "\t.skip\t";
+ CommentString = "//";
+ HasLEB128 = true;
+
+ PrivateGlobalPrefix = ".L";
+ LCOMMDirectiveType = LCOMM::ByteAlignment;
+ InlineAsmStart = "# InlineAsm Start";
+ InlineAsmEnd = "# InlineAsm End";
+ ZeroDirective = "\t.space\t";
+ AscizDirective = "\t.string\t";
+ WeakRefDirective = "\t.weak\t";
+
+ UsesELFSectionDirectiveForBSS = true;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
new file mode 100644
index 0000000..8196e95
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
@@ -0,0 +1,30 @@
+//===-- HexagonTargetAsmInfo.h - Hexagon asm properties ---------*- C++ -*--==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the HexagonMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonMCASMINFO_H
+#define HexagonMCASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ class HexagonMCAsmInfo : public MCAsmInfo {
+ public:
+ explicit HexagonMCAsmInfo(const Target &T, StringRef TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
new file mode 100644
index 0000000..625f07c
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -0,0 +1,94 @@
+//===-- HexagonMCTargetDesc.cpp - Cell Hexagon Target Descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Cell Hexagon specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCTargetDesc.h"
+#include "HexagonMCAsmInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "HexagonGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "HexagonGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "HexagonGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createHexagonMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitHexagonMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitHexagonMCRegisterInfo(X, Hexagon::R0);
+ return X;
+}
+
+static MCSubtargetInfo *createHexagonMCSubtargetInfo(StringRef TT,
+ StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitHexagonMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCAsmInfo *createHexagonMCAsmInfo(const Target &T, StringRef TT) {
+ MCAsmInfo *MAI = new HexagonMCAsmInfo(T, TT);
+
+ // VirtualFP = (R30 + #0).
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(Hexagon::R30, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ // For the time being, use static relocations, since there's really no
+ // support for PIC yet.
+ X->InitMCCodeGenInfo(Reloc::Static, CM, OL);
+ return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeHexagonTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(TheHexagonTarget, createHexagonMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheHexagonTarget,
+ createHexagonMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget, createHexagonMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheHexagonTarget,
+ createHexagonMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheHexagonTarget,
+ createHexagonMCSubtargetInfo);
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
new file mode 100644
index 0000000..364841f
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -0,0 +1,40 @@
+//===-- SPUMCTargetDesc.h - Hexagon Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Hexagon specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUMCTARGETDESC_H
+#define SPUMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheHexagonTarget;
+
+} // End llvm namespace
+
+// Define symbolic names for Hexagon registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "HexagonGenRegisterInfo.inc"
+
+// Defines symbolic names for the Hexagon instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "HexagonGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "HexagonGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 0000000..1114d99
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = HexagonDesc
+parent = Hexagon
+required_libraries = HexagonInfo MC
+add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/MCTargetDesc/Makefile b/lib/Target/Hexagon/MCTargetDesc/Makefile
new file mode 100644
index 0000000..67be2bc
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMHexagonDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Hexagon/Makefile b/lib/Target/Hexagon/Makefile
new file mode 100644
index 0000000..c936e92
--- /dev/null
+++ b/lib/Target/Hexagon/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/Hexagon/Makefile -------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMHexagonCodeGen
+TARGET = Hexagon
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = HexagonGenRegisterInfo.inc \
+ HexagonGenInstrInfo.inc \
+ HexagonGenAsmWriter.inc \
+ HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \
+ HexagonGenCallingConv.inc \
+ HexagonAsmPrinter.cpp
+
+DIRS = TargetInfo MCTargetDesc
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Hexagon/TargetInfo/CMakeLists.txt b/lib/Target/Hexagon/TargetInfo/CMakeLists.txt
new file mode 100644
index 0000000..5b04a30
--- /dev/null
+++ b/lib/Target/Hexagon/TargetInfo/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
+ ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMHexagonInfo
+ HexagonTargetInfo.cpp
+ )
+
+add_dependencies(LLVMHexagonInfo HexagonCommonTableGen)
diff --git a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
new file mode 100644
index 0000000..7aa5dd3
--- /dev/null
+++ b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- HexagonTargetInfo.cpp - Hexagon Target Implementation ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheHexagonTarget;
+
+extern "C" void LLVMInitializeHexagonTargetInfo() {
+ RegisterTarget<Triple::hexagon, /*HasJIT=*/false> X(TheHexagonTarget, "hexagon", "Hexagon");
+}
diff --git a/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt b/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt
new file mode 100644
index 0000000..7b87be3
--- /dev/null
+++ b/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Hexagon/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = HexagonInfo
+parent = Hexagon
+required_libraries = MC Support
+add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/TargetInfo/Makefile b/lib/Target/Hexagon/TargetInfo/Makefile
new file mode 100644
index 0000000..494cca1
--- /dev/null
+++ b/lib/Target/Hexagon/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/Hexagon/TargetInfo/Makefile ----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMHexagonInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
index 358cbc8..5a42ca5 100644
--- a/lib/Target/LLVMBuild.txt
+++ b/lib/Target/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = ARM CBackend CellSPU CppBackend Hexagon MBlaze MSP430 Mips PTX PowerPC Sparc X86 XCore
+
; This is a special group whose required libraries are extended (by llvm-build)
; with the best execution engine (the native JIT, if available, or the
; interpreter).
diff --git a/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/lib/Target/MBlaze/AsmParser/CMakeLists.txt
index ec8f52a..813767b 100644
--- a/lib/Target/MBlaze/AsmParser/CMakeLists.txt
+++ b/lib/Target/MBlaze/AsmParser/CMakeLists.txt
@@ -6,11 +6,4 @@ add_llvm_library(LLVMMBlazeAsmParser
MBlazeAsmParser.cpp
)
-add_llvm_library_dependencies(LLVMMBlazeAsmParser
- LLVMMBlazeInfo
- LLVMMC
- LLVMMCParser
- LLVMSupport
- )
-
add_dependencies(LLVMMBlazeAsmParser MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/AsmParser/LLVMBuild.txt b/lib/Target/MBlaze/AsmParser/LLVMBuild.txt
index 2c61a7f..b10189a 100644
--- a/lib/Target/MBlaze/AsmParser/LLVMBuild.txt
+++ b/lib/Target/MBlaze/AsmParser/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MBlazeAsmParser
parent = MBlaze
required_libraries = MBlazeInfo MC MCParser Support
add_to_library_groups = MBlaze
-
diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt
index d3f1383..71095e5 100644
--- a/lib/Target/MBlaze/CMakeLists.txt
+++ b/lib/Target/MBlaze/CMakeLists.txt
@@ -29,19 +29,6 @@ add_llvm_target(MBlazeCodeGen
MBlazeELFWriterInfo.cpp
)
-add_llvm_library_dependencies(LLVMMBlazeCodeGen
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMBlazeAsmPrinter
- LLVMMBlazeDesc
- LLVMMBlazeInfo
- LLVMMC
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(AsmParser)
add_subdirectory(Disassembler)
add_subdirectory(InstPrinter)
diff --git a/lib/Target/MBlaze/Disassembler/CMakeLists.txt b/lib/Target/MBlaze/Disassembler/CMakeLists.txt
index e0a53ee..be2dce1 100644
--- a/lib/Target/MBlaze/Disassembler/CMakeLists.txt
+++ b/lib/Target/MBlaze/Disassembler/CMakeLists.txt
@@ -13,11 +13,4 @@ set_property(
)
endif()
-add_llvm_library_dependencies(LLVMMBlazeDisassembler
- LLVMMBlazeDesc
- LLVMMBlazeInfo
- LLVMMC
- LLVMSupport
- )
-
add_dependencies(LLVMMBlazeDisassembler MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/Disassembler/LLVMBuild.txt b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt
index c5c4f80..28dd9dc 100644
--- a/lib/Target/MBlaze/Disassembler/LLVMBuild.txt
+++ b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MBlazeDisassembler
parent = MBlaze
required_libraries = MBlazeDesc MBlazeInfo MC Support
add_to_library_groups = MBlaze
-
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
index 3087317..ccc3a05 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -123,6 +123,7 @@ static unsigned decodeSEXT(uint32_t insn) {
case 0x41: return MBlaze::SRL;
case 0x21: return MBlaze::SRC;
case 0x01: return MBlaze::SRA;
+ case 0xE0: return MBlaze::CLZ;
}
}
@@ -176,6 +177,13 @@ static unsigned decodeBR(uint32_t insn) {
}
static unsigned decodeBRI(uint32_t insn) {
+ switch (insn&0x3FFFFFF) {
+ default: break;
+ case 0x0020004: return MBlaze::IDMEMBAR;
+ case 0x0220004: return MBlaze::DMEMBAR;
+ case 0x0420004: return MBlaze::IMEMBAR;
+ }
+
switch ((insn>>16)&0x1F) {
default: return UNSUPPORTED;
case 0x00: return MBlaze::BRI;
@@ -531,6 +539,9 @@ MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr,
default:
return Fail;
+ case MBlazeII::FC:
+ break;
+
case MBlazeII::FRRRR:
if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
return Fail;
@@ -547,6 +558,13 @@ MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr,
instr.addOperand(MCOperand::CreateReg(RB));
break;
+ case MBlazeII::FRR:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ break;
+
case MBlazeII::FRI:
switch (opcode) {
default:
diff --git a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
index aff0b3d..586e2d3 100644
--- a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
+++ b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
@@ -5,9 +5,4 @@ add_llvm_library(LLVMMBlazeAsmPrinter
MBlazeInstPrinter.cpp
)
-add_llvm_library_dependencies(LLVMMBlazeAsmPrinter
- LLVMMC
- LLVMSupport
- )
-
add_dependencies(LLVMMBlazeAsmPrinter MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt b/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt
index 7a21f1e..3a21a05 100644
--- a/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MBlazeAsmPrinter
parent = MBlaze
required_libraries = MC Support
add_to_library_groups = MBlaze
-
diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
index 570ab08..5297563 100644
--- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
+++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- MBLazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax ----===//
+//===-- MBlazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax ----===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/LLVMBuild.txt b/lib/Target/MBlaze/LLVMBuild.txt
index f1a3f5d..0b29007 100644
--- a/lib/Target/MBlaze/LLVMBuild.txt
+++ b/lib/Target/MBlaze/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
+
[component_0]
type = TargetGroup
name = MBlaze
@@ -29,4 +32,3 @@ name = MBlazeCodeGen
parent = MBlaze
required_libraries = AsmPrinter CodeGen Core MBlazeAsmPrinter MBlazeDesc MBlazeInfo MC SelectionDAG Support Target
add_to_library_groups = MBlaze
-
diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
index ff051e3..c751dd8 100644
--- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -310,9 +310,9 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
// Check if the last terminator is an unconditional branch.
MachineBasicBlock::const_iterator I = Pred->end();
- while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+ while (I != Pred->begin() && !(--I)->isTerminator())
; // Noop
- return I == Pred->end() || !I->getDesc().isBarrier();
+ return I == Pred->end() || !I->isBarrier();
}
// Force static initialization.
diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
index c07570a..19e787d 100644
--- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
+++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -29,13 +29,11 @@ using namespace llvm;
STATISTIC(FilledSlots, "Number of delay slots filled");
-namespace llvm {
-cl::opt<bool> DisableDelaySlotFiller(
+static cl::opt<bool> MBDisableDelaySlotFiller(
"disable-mblaze-delay-filler",
cl::init(false),
cl::desc("Disable the MBlaze delay slot filter."),
cl::Hidden);
-}
namespace {
struct Filler : public MachineFunctionPass {
@@ -109,7 +107,6 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
// Hazard check
MachineBasicBlock::iterator a = candidate;
MachineBasicBlock::iterator b = slot;
- MCInstrDesc desc = candidate->getDesc();
// MBB layout:-
// candidate := a0 = operation(a1, a2)
@@ -123,7 +120,7 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
// 4. b0 is one or more of {a1, a2}
// 5. a accesses memory, and the middle bit
// contains a store operation.
- bool a_is_memory = desc.mayLoad() || desc.mayStore();
+ bool a_is_memory = candidate->mayLoad() || candidate->mayStore();
// Determine the number of operands in the slot instruction and in the
// candidate instruction.
@@ -156,7 +153,7 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
}
// Check hazard type 5
- if (a_is_memory && m->getDesc().mayStore())
+ if (a_is_memory && m->mayStore())
return true;
}
@@ -183,8 +180,8 @@ static bool isDelayFiller(MachineBasicBlock &MBB,
if (candidate == MBB.begin())
return false;
- MCInstrDesc brdesc = (--candidate)->getDesc();
- return (brdesc.hasDelaySlot());
+ --candidate;
+ return (candidate->hasDelaySlot());
}
static bool hasUnknownSideEffects(MachineBasicBlock::iterator &I) {
@@ -211,9 +208,8 @@ findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) {
break;
--I;
- MCInstrDesc desc = I->getDesc();
- if (desc.hasDelaySlot() || desc.isBranch() || isDelayFiller(MBB,I) ||
- desc.isCall() || desc.isReturn() || desc.isBarrier() ||
+ if (I->hasDelaySlot() || I->isBranch() || isDelayFiller(MBB,I) ||
+ I->isCall() || I->isReturn() || I->isBarrier() ||
hasUnknownSideEffects(I))
break;
@@ -232,11 +228,11 @@ findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) {
bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
- if (I->getDesc().hasDelaySlot()) {
+ if (I->hasDelaySlot()) {
MachineBasicBlock::iterator D = MBB.end();
MachineBasicBlock::iterator J = I;
- if (!DisableDelaySlotFiller)
+ if (!MBDisableDelaySlotFiller)
D = findDelayInstr(MBB,I);
++FilledSlots;
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
index f28d5a7..37919bc 100644
--- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
@@ -32,13 +32,11 @@
using namespace llvm;
-namespace llvm {
- cl::opt<bool> DisableStackAdjust(
- "disable-mblaze-stack-adjust",
- cl::init(false),
- cl::desc("Disable MBlaze stack layout adjustment."),
- cl::Hidden);
-}
+static cl::opt<bool> MBDisableStackAdjust(
+ "disable-mblaze-stack-adjust",
+ cl::init(false),
+ cl::desc("Disable MBlaze stack layout adjustment."),
+ cl::Hidden);
static void replaceFrameIndexes(MachineFunction &MF,
SmallVector<std::pair<int,int64_t>, 16> &FR) {
@@ -85,7 +83,7 @@ static void replaceFrameIndexes(MachineFunction &MF,
//===----------------------------------------------------------------------===//
static void analyzeFrameIndexes(MachineFunction &MF) {
- if (DisableStackAdjust) return;
+ if (MBDisableStackAdjust) return;
MachineFrameInfo *MFI = MF.getFrameInfo();
MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
@@ -336,7 +334,8 @@ int MBlazeFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI)
// if frame pointer elimination is disabled.
bool MBlazeFrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects();
}
void MBlazeFrameLowering::emitPrologue(MachineFunction &MF) const {
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 148d906..0002174 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -167,7 +167,9 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td
index 54f605f..4c6034d 100644
--- a/lib/Target/MBlaze/MBlazeInstrFormats.td
+++ b/lib/Target/MBlaze/MBlazeInstrFormats.td
@@ -35,6 +35,7 @@ def FRIR : Format<17>; // RSUBI
def FRRRR : Format<18>; // RSUB, FRSUB
def FRI : Format<19>; // RSUB, FRSUB
def FC : Format<20>; // NOP
+def FRR : Format<21>; // CLZ
//===----------------------------------------------------------------------===//
// Describe MBlaze instructions format
@@ -202,3 +203,26 @@ class MSR<bits<6> op, bits<6> flags, dag outs, dag ins, string asmstr,
let Inst{11-16} = flags;
let Inst{17-31} = imm15;
}
+
+//===----------------------------------------------------------------------===//
+// TCLZ instruction class in MBlaze : <|opcode|rd|imm15|>
+//===----------------------------------------------------------------------===//
+class TCLZ<bits<6> op, bits<16> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op, FRR, outs, ins, asmstr, pattern, itin> {
+ bits<5> rd;
+ bits<5> ra;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-31} = flags;
+}
+
+//===----------------------------------------------------------------------===//
+// MBAR instruction class in MBlaze : <|opcode|rd|imm15|>
+//===----------------------------------------------------------------------===//
+class MBAR<bits<6> op, bits<26> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op, FC, outs, ins, asmstr, pattern, itin> {
+ let Inst{6-31} = flags;
+}
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td
index 1d8c987..9fe2a49 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -594,9 +594,18 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
//===----------------------------------------------------------------------===//
let neverHasSideEffects = 1 in {
- def NOP : MBlazeInst< 0x20, FC, (outs), (ins), "nop ", [], IIC_ALU>;
+ def NOP : MBlazeInst<0x20, FC, (outs), (ins), "nop ", [], IIC_ALU>;
}
+let Predicates=[HasPatCmp] in {
+ def CLZ : TCLZ<0x24, 0x00E0, (outs GPR:$dst), (ins GPR:$src),
+ "clz $dst, $src", [], IIC_ALU>;
+}
+
+def IMEMBAR : MBAR<0x2E, 0x0420004, (outs), (ins), "mbar 2", [], IIC_ALU>;
+def DMEMBAR : MBAR<0x2E, 0x0220004, (outs), (ins), "mbar 1", [], IIC_ALU>;
+def IDMEMBAR : MBAR<0x2E, 0x0020004, (outs), (ins), "mbar 0", [], IIC_ALU>;
+
let usesCustomInserter = 1 in {
def Select_CC : MBlazePseudo<(outs GPR:$dst),
(ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC), // F T reversed
@@ -751,6 +760,56 @@ def : Pat<(sra GPR:$L, GPR:$R), (ShiftRA GPR:$L, GPR:$R)>;
def : Pat<(srl GPR:$L, GPR:$R), (ShiftRL GPR:$L, GPR:$R)>;
// SET_CC operations
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 1)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETNE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 2)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETLT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETLE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 6)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETUGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU (i32 R0), GPR:$L), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETULT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU (i32 R0), GPR:$L), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETUGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU (i32 R0), GPR:$L), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETULE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU (i32 R0), GPR:$L), 6)>;
+
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 1)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETNE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 2)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 3)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETLT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 4)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 5)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETLE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 6)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETUGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, (i32 R0)), 3)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETULT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, (i32 R0)), 4)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETUGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, (i32 R0)), 5)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETULE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, (i32 R0)), 6)>;
+
def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ),
(Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
(CMP GPR:$R, GPR:$L), 1)>;
@@ -787,6 +846,68 @@ def : Pat<(select (i32 GPR:$C), (i32 GPR:$T), (i32 GPR:$F)),
(Select_CC GPR:$T, GPR:$F, GPR:$C, 2)>;
// SELECT_CC
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETEQ),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 1)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETNE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 2)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETGT),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETLT),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETGE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETLE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 6)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETULT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETULE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 6)>;
+
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETEQ),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 1)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETNE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 2)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETGT),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 3)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETLT),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 4)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETGE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 5)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETLE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 6)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 3)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETULT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 4)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 5)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETULE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 6)>;
+
def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
(i32 GPR:$T), (i32 GPR:$F), SETEQ),
(Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 1)>;
@@ -827,6 +948,48 @@ def : Pat<(br bb:$T), (BRID bb:$T)>;
def : Pat<(brind GPR:$T), (BRAD GPR:$T)>;
// BRCOND instructions
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETEQ), bb:$T),
+ (BEQID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETNE), bb:$T),
+ (BNEID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETGT), bb:$T),
+ (BGTID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETLT), bb:$T),
+ (BLTID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETGE), bb:$T),
+ (BGEID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETLE), bb:$T),
+ (BLEID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETUGT), bb:$T),
+ (BGTID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETULT), bb:$T),
+ (BLTID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETUGE), bb:$T),
+ (BGEID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETULE), bb:$T),
+ (BLEID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETEQ), bb:$T),
+ (BEQID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETNE), bb:$T),
+ (BNEID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETGT), bb:$T),
+ (BGTID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETLT), bb:$T),
+ (BLTID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETGE), bb:$T),
+ (BGEID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETLE), bb:$T),
+ (BLEID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETUGT), bb:$T),
+ (BGTID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETULT), bb:$T),
+ (BLTID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETUGE), bb:$T),
+ (BGEID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETULE), bb:$T),
+ (BLEID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+
def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ), bb:$T),
(BEQID (CMP GPR:$R, GPR:$L), bb:$T)>;
def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETNE), bb:$T),
@@ -869,11 +1032,11 @@ def : Pat<(store (i32 GPR:$dst), xaddr:$addr), (SW GPR:$dst, xaddr:$addr)>;
def : Pat<(load xaddr:$addr), (i32 (LW xaddr:$addr))>;
// 16-bit load and store
-def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$addr), (SH GPR:$dst, xaddr:$addr)>;
+def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$ad), (SH GPR:$dst, xaddr:$ad)>;
def : Pat<(zextloadi16 xaddr:$addr), (i32 (LHU xaddr:$addr))>;
// 8-bit load and store
-def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$addr), (SB GPR:$dst, xaddr:$addr)>;
+def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$ad), (SB GPR:$dst, xaddr:$ad)>;
def : Pat<(zextloadi8 xaddr:$addr), (i32 (LBU xaddr:$addr))>;
// Peepholes
diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/lib/Target/MBlaze/MBlazeMCInstLower.cpp
index a7e400b..7e5598f 100644
--- a/lib/Target/MBlaze/MBlazeMCInstLower.cpp
+++ b/lib/Target/MBlaze/MBlazeMCInstLower.cpp
@@ -1,4 +1,4 @@
-//===-- MBLazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===//
+//===-- MBlazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 4ad7bd6..5ed81dd 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -33,16 +33,16 @@ extern "C" void LLVMInitializeMBlazeTarget() {
// an easier handling.
MBlazeTargetMachine::
MBlazeTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL):
- LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
- Subtarget(TT, CPU, FS),
- DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
- InstrInfo(*this),
- FrameLowering(Subtarget),
- TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()) {
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS),
+ DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
+ InstrInfo(*this),
+ FrameLowering(Subtarget),
+ TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
+ InstrItins(Subtarget.getInstrItineraryData()) {
}
// Install an instruction selector pass using
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index 1c1aa53..036f1b6 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -43,6 +43,7 @@ namespace llvm {
public:
MBlazeTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
diff --git a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
index 37871b6..6fa7f43 100644
--- a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
@@ -5,11 +5,4 @@ add_llvm_library(LLVMMBlazeDesc
MBlazeMCTargetDesc.cpp
)
-add_llvm_library_dependencies(LLVMMBlazeDesc
- LLVMMBlazeAsmPrinter
- LLVMMBlazeInfo
- LLVMMC
- LLVMSupport
- )
-
add_dependencies(LLVMMBlazeDesc MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt b/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt
index e89811b..4982f0f 100644
--- a/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MBlazeDesc
parent = MBlaze
required_libraries = MBlazeAsmPrinter MBlazeInfo MC Support
add_to_library_groups = MBlaze
-
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
index 08f7d46..d5acbe9 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
@@ -58,6 +58,11 @@ public:
bool MayNeedRelaxation(const MCInst &Inst) const;
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const;
+
void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
@@ -87,6 +92,18 @@ bool MBlazeAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
return hasExprOrImm;
}
+bool MBlazeAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // FIXME: Is this right? It's what the "generic" code was doing before,
+ // but is X86 specific. Is it actually true for MBlaze also, or was it
+ // just close enough to not be a big deal?
+ //
+ // Relax if the value is too big for a (signed) i8.
+ return int64_t(Value) != int64_t(int8_t(Value));
+}
+
void MBlazeAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
Res = Inst;
Res.setOpcode(getRelaxedOpcode(Inst.getOpcode()));
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
index 776dbc4..c8bdd6f 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
@@ -51,6 +51,7 @@ namespace MBlazeII {
FRRRR,
FRI,
FC,
+ FRR,
FormMask = 63
//===------------------------------------------------------------------===//
diff --git a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
index 93fce58..b554d9b 100644
--- a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
+++ b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
@@ -5,10 +5,4 @@ add_llvm_library(LLVMMBlazeInfo
MBlazeTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMMBlazeInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMMBlazeInfo MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt b/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt
index 938a1d9..ba7ee5d 100644
--- a/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MBlazeInfo
parent = MBlaze
required_libraries = MC Support Target
add_to_library_groups = MBlaze
-
diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt
index 55c2d7d..7daa7a2 100644
--- a/lib/Target/MSP430/CMakeLists.txt
+++ b/lib/Target/MSP430/CMakeLists.txt
@@ -22,19 +22,6 @@ add_llvm_target(MSP430CodeGen
MSP430MCInstLower.cpp
)
-add_llvm_library_dependencies(LLVMMSP430CodeGen
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMMSP430AsmPrinter
- LLVMMSP430Desc
- LLVMMSP430Info
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/lib/Target/MSP430/InstPrinter/CMakeLists.txt
index ce39d95..64ac994 100644
--- a/lib/Target/MSP430/InstPrinter/CMakeLists.txt
+++ b/lib/Target/MSP430/InstPrinter/CMakeLists.txt
@@ -4,9 +4,4 @@ add_llvm_library(LLVMMSP430AsmPrinter
MSP430InstPrinter.cpp
)
-add_llvm_library_dependencies(LLVMMSP430AsmPrinter
- LLVMMC
- LLVMSupport
- )
-
add_dependencies(LLVMMSP430AsmPrinter MSP430CommonTableGen)
diff --git a/lib/Target/MSP430/InstPrinter/LLVMBuild.txt b/lib/Target/MSP430/InstPrinter/LLVMBuild.txt
index aeb863a..37b8c25 100644
--- a/lib/Target/MSP430/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/MSP430/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MSP430AsmPrinter
parent = MSP430
required_libraries = MC Support
add_to_library_groups = MSP430
-
diff --git a/lib/Target/MSP430/LLVMBuild.txt b/lib/Target/MSP430/LLVMBuild.txt
index 024312b..51d9702 100644
--- a/lib/Target/MSP430/LLVMBuild.txt
+++ b/lib/Target/MSP430/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
[component_0]
type = TargetGroup
name = MSP430
@@ -27,4 +30,3 @@ name = MSP430CodeGen
parent = MSP430
required_libraries = AsmPrinter CodeGen Core MC MSP430AsmPrinter MSP430Desc MSP430Info SelectionDAG Support Target
add_to_library_groups = MSP430
-
diff --git a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
index c2dd448..adc95c5 100644
--- a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
@@ -3,12 +3,4 @@ add_llvm_library(LLVMMSP430Desc
MSP430MCAsmInfo.cpp
)
-add_llvm_library_dependencies(LLVMMSP430Desc
- LLVMMC
- LLVMMSP430AsmPrinter
- LLVMMSP430Info
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMMSP430Desc MSP430CommonTableGen)
diff --git a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt
index 1890e9d..3319d93 100644
--- a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MSP430Desc
parent = MSP430
required_libraries = MC MSP430AsmPrinter MSP430Info Support Target
add_to_library_groups = MSP430
-
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index c99f4ab..e406ff2 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
bool MSP430FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- return (DisableFramePointerElim(MF) ||
+ return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
MF.getFrameInfo()->hasVarSizedObjects() ||
MFI->isFrameAddressTaken());
}
@@ -140,7 +140,7 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
while (MBBI != MBB.begin()) {
MachineBasicBlock::iterator PI = prior(MBBI);
unsigned Opc = PI->getOpcode();
- if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator())
+ if (Opc != MSP430::POP16r && !PI->isTerminator())
break;
--MBBI;
}
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 5c94137..884d69b 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -122,8 +122,12 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
setOperationAction(ISD::CTTZ, MVT::i8, Expand);
setOperationAction(ISD::CTTZ, MVT::i16, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand);
setOperationAction(ISD::CTLZ, MVT::i8, Expand);
setOperationAction(ISD::CTLZ, MVT::i16, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand);
setOperationAction(ISD::CTPOP, MVT::i8, Expand);
setOperationAction(ISD::CTPOP, MVT::i16, Expand);
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 81f766e..9d3c7e9 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -158,13 +158,12 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
}
bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isTerminator()) return false;
+ if (!MI->isTerminator()) return false;
// Conditional branch is a special case.
- if (MCID.isBranch() && !MCID.isBarrier())
+ if (MI->isBranch() && !MI->isBarrier())
return true;
- if (!MCID.isPredicable())
+ if (!MI->isPredicable())
return true;
return !isPredicated(MI);
}
@@ -189,7 +188,7 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// A terminator that isn't a branch can't easily be handled
// by this analysis.
- if (!I->getDesc().isBranch())
+ if (!I->isBranch())
return true;
// Cannot handle indirect branches.
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index fe185fb..a0fc3da 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -28,9 +28,10 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T,
StringRef TT,
StringRef CPU,
StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS),
// FIXME: Check TargetData string.
DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index 4fb060f..28d482a 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -39,7 +39,7 @@ class MSP430TargetMachine : public LLVMTargetMachine {
public:
MSP430TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
diff --git a/lib/Target/MSP430/TargetInfo/CMakeLists.txt b/lib/Target/MSP430/TargetInfo/CMakeLists.txt
index 1526946..f6b40ea 100644
--- a/lib/Target/MSP430/TargetInfo/CMakeLists.txt
+++ b/lib/Target/MSP430/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMMSP430Info
MSP430TargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMMSP430Info
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMMSP430Info MSP430CommonTableGen)
diff --git a/lib/Target/MSP430/TargetInfo/LLVMBuild.txt b/lib/Target/MSP430/TargetInfo/LLVMBuild.txt
index a745ea8..deafc2d 100644
--- a/lib/Target/MSP430/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/MSP430/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MSP430Info
parent = MSP430
required_libraries = MC Support Target
add_to_library_groups = MSP430
-
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index ac9cfc0..a13c0e8 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -29,19 +29,6 @@ add_llvm_target(MipsCodeGen
MipsSelectionDAGInfo.cpp
)
-add_llvm_library_dependencies(LLVMMipsCodeGen
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMMipsAsmPrinter
- LLVMMipsDesc
- LLVMMipsInfo
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Mips/InstPrinter/CMakeLists.txt b/lib/Target/Mips/InstPrinter/CMakeLists.txt
index c45b35d..3e9fbf1 100644
--- a/lib/Target/Mips/InstPrinter/CMakeLists.txt
+++ b/lib/Target/Mips/InstPrinter/CMakeLists.txt
@@ -4,9 +4,4 @@ add_llvm_library(LLVMMipsAsmPrinter
MipsInstPrinter.cpp
)
-add_llvm_library_dependencies(LLVMMipsAsmPrinter
- LLVMMC
- LLVMSupport
- )
-
add_dependencies(LLVMMipsAsmPrinter MipsCommonTableGen)
diff --git a/lib/Target/Mips/InstPrinter/LLVMBuild.txt b/lib/Target/Mips/InstPrinter/LLVMBuild.txt
index d953a61..317057b 100644
--- a/lib/Target/Mips/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/Mips/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MipsAsmPrinter
parent = Mips
required_libraries = MC Support
add_to_library_groups = Mips
-
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index f544d39..3e9c46a 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -96,10 +96,14 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
case MCSymbolRefExpr::VK_None: break;
case MCSymbolRefExpr::VK_Mips_GPREL: OS << "%gp_rel("; break;
case MCSymbolRefExpr::VK_Mips_GOT_CALL: OS << "%call16("; break;
+ case MCSymbolRefExpr::VK_Mips_GOT16: OS << "%got("; break;
case MCSymbolRefExpr::VK_Mips_GOT: OS << "%got("; break;
case MCSymbolRefExpr::VK_Mips_ABS_HI: OS << "%hi("; break;
case MCSymbolRefExpr::VK_Mips_ABS_LO: OS << "%lo("; break;
case MCSymbolRefExpr::VK_Mips_TLSGD: OS << "%tlsgd("; break;
+ case MCSymbolRefExpr::VK_Mips_TLSLDM: OS << "%tlsldm("; break;
+ case MCSymbolRefExpr::VK_Mips_DTPREL_HI:OS << "%dtprel_hi("; break;
+ case MCSymbolRefExpr::VK_Mips_DTPREL_LO:OS << "%dtprel_lo("; break;
case MCSymbolRefExpr::VK_Mips_GOTTPREL: OS << "%gottprel("; break;
case MCSymbolRefExpr::VK_Mips_TPREL_HI: OS << "%tprel_hi("; break;
case MCSymbolRefExpr::VK_Mips_TPREL_LO: OS << "%tprel_lo("; break;
diff --git a/lib/Target/Mips/LLVMBuild.txt b/lib/Target/Mips/LLVMBuild.txt
index e733b52..bcd32bc 100644
--- a/lib/Target/Mips/LLVMBuild.txt
+++ b/lib/Target/Mips/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
[component_0]
type = TargetGroup
name = Mips
@@ -28,4 +31,3 @@ name = MipsCodeGen
parent = Mips
required_libraries = AsmPrinter CodeGen Core MC MipsAsmPrinter MipsDesc MipsInfo SelectionDAG Support Target
add_to_library_groups = Mips
-
diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
index 2ceb5c9..0eb0a55 100644
--- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
@@ -5,11 +5,4 @@ add_llvm_library(LLVMMipsDesc
MipsMCTargetDesc.cpp
)
-add_llvm_library_dependencies(LLVMMipsDesc
- LLVMMC
- LLVMMipsAsmPrinter
- LLVMMipsInfo
- LLVMSupport
- )
-
add_dependencies(LLVMMipsDesc MipsCommonTableGen)
diff --git a/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt b/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt
index d6f5dd2..29f5da6 100644
--- a/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MipsDesc
parent = Mips
required_libraries = MC MipsAsmPrinter MipsInfo Support
add_to_library_groups = Mips
-
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 7bc5fe4..60ff4fe 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -29,13 +29,19 @@
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
+// Prepare value for the target space for it
static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
// Add/subtract and shift
switch (Kind) {
default:
+ return 0;
+ case FK_GPRel_4:
+ case FK_Data_4:
+ case Mips::fixup_Mips_LO16:
break;
case Mips::fixup_Mips_PC16:
// So far we are only using this type for branches.
@@ -52,25 +58,10 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
// address range.
Value >>= 2;
break;
- }
-
- // Mask off value for placement as an operand
- switch (Kind) {
- default:
- break;
- case FK_GPRel_4:
- case FK_Data_4:
- Value &= 0xffffffff;
- break;
- case Mips::fixup_Mips_26:
- Value &= 0x03ffffff;
- break;
- case Mips::fixup_Mips_LO16:
- case Mips::fixup_Mips_PC16:
- Value &= 0x0000ffff;
- break;
case Mips::fixup_Mips_HI16:
- Value >>= 16;
+ case Mips::fixup_Mips_GOT_Local:
+ // Get the higher 16-bits. Also add 1 if bit 15 is 1.
+ Value = (Value >> 16) + ((Value & 0x8000) != 0);
break;
}
@@ -96,42 +87,40 @@ public:
/// fixup kind as appropriate.
void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const {
- unsigned Kind = (unsigned)Fixup.getKind();
- Value = adjustFixupValue(Kind, Value);
+ MCFixupKind Kind = Fixup.getKind();
+ Value = adjustFixupValue((unsigned)Kind, Value);
if (!Value)
- return; // Doesn't change encoding.
+ return; // Doesn't change encoding.
unsigned Offset = Fixup.getOffset();
- switch (Kind) {
- default:
- llvm_unreachable("Unknown fixup kind!");
- case Mips::fixup_Mips_GOT16: // This will be fixed up at link time
- break;
- case FK_GPRel_4:
- case FK_Data_4:
- case Mips::fixup_Mips_26:
- case Mips::fixup_Mips_LO16:
- case Mips::fixup_Mips_PC16:
- case Mips::fixup_Mips_HI16:
- // For each byte of the fragment that the fixup touches, mask i
- // the fixup value. The Value has been "split up" into the appr
- // bitfields above.
- for (unsigned i = 0; i != 4; ++i) // FIXME - Need to support 2 and 8 bytes
- Data[Offset + i] += uint8_t((Value >> (i * 8)) & 0xff);
- break;
+ // FIXME: The below code will not work across endian models
+ // How many bytes/bits are we fixing up?
+ unsigned NumBytes = ((getFixupKindInfo(Kind).TargetSize-1)/8)+1;
+ uint64_t Mask = ((uint64_t)1 << getFixupKindInfo(Kind).TargetSize) - 1;
+
+ // Grab current value, if any, from bits.
+ uint64_t CurVal = 0;
+ for (unsigned i = 0; i != NumBytes; ++i)
+ CurVal |= ((uint8_t)Data[Offset + i]) << (i * 8);
+
+ CurVal = (CurVal & ~Mask) | ((CurVal + Value) & Mask);
+
+ // Write out the bytes back to the code/data bits.
+ // First the unaffected bits and then the fixup.
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ Data[Offset + i] = uint8_t((CurVal >> (i * 8)) & 0xff);
}
- }
+}
unsigned getNumFixupKinds() const { return Mips::NumTargetFixupKinds; }
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
const static MCFixupKindInfo Infos[Mips::NumTargetFixupKinds] = {
- // This table *must* be in the order that the fixup_* kinds a
+ // This table *must* be in same the order of fixup_* kinds in
// MipsFixupKinds.h.
//
// name offset bits flags
- { "fixup_Mips_NONE", 0, 0, 0 },
{ "fixup_Mips_16", 0, 16, 0 },
{ "fixup_Mips_32", 0, 32, 0 },
{ "fixup_Mips_REL32", 0, 32, 0 },
@@ -140,7 +129,8 @@ public:
{ "fixup_Mips_LO16", 0, 16, 0 },
{ "fixup_Mips_GPREL16", 0, 16, 0 },
{ "fixup_Mips_LITERAL", 0, 16, 0 },
- { "fixup_Mips_GOT16", 0, 16, 0 },
+ { "fixup_Mips_GOT_Global", 0, 16, 0 },
+ { "fixup_Mips_GOT_Local", 0, 16, 0 },
{ "fixup_Mips_PC16", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_Mips_CALL16", 0, 16, 0 },
{ "fixup_Mips_GPREL32", 0, 32, 0 },
@@ -173,6 +163,17 @@ public:
return false;
}
+ /// fixupNeedsRelaxation - Target specific predicate for whether a given
+ /// fixup requires the associated instruction to be relaxed.
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // FIXME.
+ assert(0 && "RelaxInstruction() unimplemented");
+ return false;
+ }
+
/// RelaxInstruction - Relax the instruction in the given fragment
/// to the next wider instruction.
///
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index cebfde0..00fc5df 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -31,8 +31,9 @@ namespace MipsII {
MO_NO_FLAG,
- /// MO_GOT - Represents the offset into the global offset table at which
+ /// MO_GOT16 - Represents the offset into the global offset table at which
/// the address the relocation entry symbol resides during execution.
+ MO_GOT16,
MO_GOT,
/// MO_GOT_CALL - Represents the offset into the global offset table at
@@ -55,6 +56,13 @@ namespace MipsII {
// Dynamic TLS).
MO_TLSGD,
+ /// MO_TLSLDM - Represents the offset into the global offset table at which
+ // the module ID and TSL block offset reside during execution (Local
+ // Dynamic TLS).
+ MO_TLSLDM,
+ MO_DTPREL_HI,
+ MO_DTPREL_LO,
+
/// MO_GOTTPREL - Represents the offset from the thread pointer (Initial
// Exec TLS).
MO_GOTTPREL,
@@ -180,6 +188,7 @@ inline static unsigned getMipsRegisterNumbering(unsigned RegEnum)
case Mips::D14:
return 28;
case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64:
+ case Mips::HWR29:
return 29;
case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64:
case Mips::D15:
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index 20890ed..a56c002 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -14,74 +14,82 @@
namespace llvm {
namespace Mips {
- enum Fixups {
- // fixup_Mips_xxx - R_MIPS_NONE
- fixup_Mips_NONE = FirstTargetFixupKind,
+ // Although most of the current fixup types reflect a unique relocation
+ // one can have multiple fixup types for a given relocation and thus need
+ // to be uniquely named.
+ //
+ // This table *must* be in the save order of
+ // MCFixupKindInfo Infos[Mips::NumTargetFixupKinds]
+ // in MipsAsmBackend.cpp.
+ //
+ enum Fixups {
+ // Branch fixups resulting in R_MIPS_16.
+ fixup_Mips_16 = FirstTargetFixupKind,
- // fixup_Mips_xxx - R_MIPS_16.
- fixup_Mips_16,
+ // Pure 32 bit data fixup resulting in - R_MIPS_32.
+ fixup_Mips_32,
- // fixup_Mips_xxx - R_MIPS_32.
- fixup_Mips_32,
+ // Full 32 bit data relative data fixup resulting in - R_MIPS_REL32.
+ fixup_Mips_REL32,
- // fixup_Mips_xxx - R_MIPS_REL32.
- fixup_Mips_REL32,
+ // Jump 26 bit fixup resulting in - R_MIPS_26.
+ fixup_Mips_26,
- // fixup_Mips_xxx - R_MIPS_26.
- fixup_Mips_26,
+ // Pure upper 16 bit fixup resulting in - R_MIPS_HI16.
+ fixup_Mips_HI16,
- // fixup_Mips_xxx - R_MIPS_HI16.
- fixup_Mips_HI16,
+ // Pure lower 16 bit fixup resulting in - R_MIPS_LO16.
+ fixup_Mips_LO16,
- // fixup_Mips_xxx - R_MIPS_LO16.
- fixup_Mips_LO16,
+ // 16 bit fixup for GP offest resulting in - R_MIPS_GPREL16.
+ fixup_Mips_GPREL16,
- // fixup_Mips_xxx - R_MIPS_GPREL16.
- fixup_Mips_GPREL16,
+ // 16 bit literal fixup resulting in - R_MIPS_LITERAL.
+ fixup_Mips_LITERAL,
- // fixup_Mips_xxx - R_MIPS_LITERAL.
- fixup_Mips_LITERAL,
+ // Global symbol fixup resulting in - R_MIPS_GOT16.
+ fixup_Mips_GOT_Global,
- // fixup_Mips_xxx - R_MIPS_GOT16.
- fixup_Mips_GOT16,
+ // Local symbol fixup resulting in - R_MIPS_GOT16.
+ fixup_Mips_GOT_Local,
- // fixup_Mips_xxx - R_MIPS_PC16.
- fixup_Mips_PC16,
+ // PC relative branch fixup resulting in - R_MIPS_PC16.
+ fixup_Mips_PC16,
- // fixup_Mips_xxx - R_MIPS_CALL16.
- fixup_Mips_CALL16,
+ // resulting in - R_MIPS_CALL16.
+ fixup_Mips_CALL16,
- // fixup_Mips_xxx - R_MIPS_GPREL32.
- fixup_Mips_GPREL32,
+ // resulting in - R_MIPS_GPREL32.
+ fixup_Mips_GPREL32,
- // fixup_Mips_xxx - R_MIPS_SHIFT5.
- fixup_Mips_SHIFT5,
+ // resulting in - R_MIPS_SHIFT5.
+ fixup_Mips_SHIFT5,
- // fixup_Mips_xxx - R_MIPS_SHIFT6.
- fixup_Mips_SHIFT6,
+ // resulting in - R_MIPS_SHIFT6.
+ fixup_Mips_SHIFT6,
- // fixup_Mips_xxx - R_MIPS_64.
- fixup_Mips_64,
+ // Pure 64 bit data fixup resulting in - R_MIPS_64.
+ fixup_Mips_64,
- // fixup_Mips_xxx - R_MIPS_TLS_GD.
- fixup_Mips_TLSGD,
+ // resulting in - R_MIPS_TLS_GD.
+ fixup_Mips_TLSGD,
- // fixup_Mips_xxx - R_MIPS_TLS_GOTTPREL.
- fixup_Mips_GOTTPREL,
+ // resulting in - R_MIPS_TLS_GOTTPREL.
+ fixup_Mips_GOTTPREL,
- // fixup_Mips_xxx - R_MIPS_TLS_TPREL_HI16.
- fixup_Mips_TPREL_HI,
+ // resulting in - R_MIPS_TLS_TPREL_HI16.
+ fixup_Mips_TPREL_HI,
- // fixup_Mips_xxx - R_MIPS_TLS_TPREL_LO16.
- fixup_Mips_TPREL_LO,
+ // resulting in - R_MIPS_TLS_TPREL_LO16.
+ fixup_Mips_TPREL_LO,
- // fixup_Mips_xxx - yyy. // This should become R_MIPS_PC16
- fixup_Mips_Branch_PCRel,
+ // PC relative branch fixup resulting in - R_MIPS_PC16
+ fixup_Mips_Branch_PCRel,
- // Marker
- LastTargetFixupKind,
- NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
- };
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+ };
} // namespace Mips
} // namespace llvm
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 0c3cbb3..463dcfe 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -194,8 +194,11 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
case MCSymbolRefExpr::VK_Mips_GOT_CALL:
FixupKind = Mips::fixup_Mips_CALL16;
break;
+ case MCSymbolRefExpr::VK_Mips_GOT16:
+ FixupKind = Mips::fixup_Mips_GOT_Global;
+ break;
case MCSymbolRefExpr::VK_Mips_GOT:
- FixupKind = Mips::fixup_Mips_GOT16;
+ FixupKind = Mips::fixup_Mips_GOT_Local;
break;
case MCSymbolRefExpr::VK_Mips_ABS_HI:
FixupKind = Mips::fixup_Mips_HI16;
@@ -245,8 +248,8 @@ unsigned
MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
assert(MI.getOperand(OpNo).isImm());
- unsigned szEncoding = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups);
- return szEncoding - 1;
+ unsigned SizeEncoding = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups);
+ return SizeEncoding - 1;
}
// FIXME: should be called getMSBEncoding
@@ -256,10 +259,10 @@ MipsMCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
assert(MI.getOperand(OpNo-1).isImm());
assert(MI.getOperand(OpNo).isImm());
- unsigned pos = getMachineOpValue(MI, MI.getOperand(OpNo-1), Fixups);
- unsigned sz = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups);
+ unsigned Position = getMachineOpValue(MI, MI.getOperand(OpNo-1), Fixups);
+ unsigned Size = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups);
- return pos + sz - 1;
+ return Position + Size - 1;
}
#include "MipsGenMCCodeEmitter.inc"
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 39c2c16..e9e0f60 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -79,9 +79,9 @@ def FeatureMips64r2 : SubtargetFeature<"mips64r2", "MipsArchVersion",
class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, MipsGenericItineraries, Features>;
-def : Proc<"mips32r1", [FeatureMips32]>;
-def : Proc<"4ke", [FeatureMips32r2]>;
-def : Proc<"mips64r1", [FeatureMips64]>;
+def : Proc<"mips32", [FeatureMips32]>;
+def : Proc<"mips32r2", [FeatureMips32r2]>;
+def : Proc<"mips64", [FeatureMips64]>;
def : Proc<"mips64r2", [FeatureMips64r2]>;
def MipsAsmWriter : AsmWriter {
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index b0fb4fa..2996986 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -25,7 +25,7 @@ def uimm16_64 : Operand<i64> {
// Transformation Function - get Imm - 32.
def Subtract32 : SDNodeXForm<imm, [{
- return getI32Imm((unsigned)N->getZExtValue() - 32);
+ return getImm(N, (unsigned)N->getZExtValue() - 32);
}]>;
// shamt field must fit in 5 bits.
@@ -36,6 +36,19 @@ def imm32_63 : ImmLeaf<i32,
[{return (int32_t)Imm >= 32 && (int32_t)Imm < 64;}],
Subtract32>;
+// Is a 32-bit int.
+def immSExt32 : ImmLeaf<i64, [{return isInt<32>(Imm);}]>;
+
+// Transformation Function - get the higher 16 bits.
+def HIGHER : SDNodeXForm<imm, [{
+ return getImm(N, (N->getZExtValue() >> 32) & 0xFFFF);
+}]>;
+
+// Transformation Function - get the highest 16 bits.
+def HIGHEST : SDNodeXForm<imm, [{
+ return getImm(N, (N->getZExtValue() >> 48) & 0xFFFF);
+}]>;
+
//===----------------------------------------------------------------------===//
// Instructions specific format
//===----------------------------------------------------------------------===//
@@ -206,6 +219,17 @@ let Uses = [SP_64] in
def DynAlloc64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>,
Requires<[IsN64]>;
+def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>;
+
+def DEXT : ExtBase<3, "dext", CPU64Regs>;
+def DINS : InsBase<7, "dins", CPU64Regs>;
+
+def DSLL64_32 : FR<0x3c, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
+ "dsll32\t$rd, $rt, 0", [], IIAlu>;
+
+def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
+ "sll\t$rd, $rt, 0", [], IIAlu>;
+
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
//===----------------------------------------------------------------------===//
@@ -216,9 +240,15 @@ def : Pat<(i64 immSExt16:$in),
def : Pat<(i64 immZExt16:$in),
(ORi64 ZERO_64, imm:$in)>;
+// 32-bit immediates
+def : Pat<(i64 immSExt32:$imm),
+ (ORi64 (LUi64 (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
// Arbitrary immediates
def : Pat<(i64 imm:$imm),
- (ORi64 (LUi64 (HI16 imm:$imm)), (LO16 imm:$imm))>;
+ (ORi64 (DSLL (ORi64 (DSLL (ORi64 (LUi64 (HIGHEST imm:$imm)),
+ (HIGHER imm:$imm)), 16), (HI16 imm:$imm)), 16),
+ (LO16 imm:$imm))>;
// extended loads
let Predicates = [NotN64] in {
@@ -236,11 +266,13 @@ def : Pat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>;
def : Pat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>;
def : Pat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>;
def : Pat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>;
+def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>;
def : Pat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>;
def : Pat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>;
def : Pat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>;
def : Pat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>;
+def : Pat<(MipsLo tglobaltlsaddr:$in), (DADDiu ZERO_64, tglobaltlsaddr:$in)>;
def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)),
(DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>;
@@ -250,6 +282,15 @@ def : Pat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)),
(DADDiu CPU64Regs:$hi, tjumptable:$lo)>;
def : Pat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)),
(DADDiu CPU64Regs:$hi, tconstpool:$lo)>;
+def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaltlsaddr:$lo)),
+ (DADDiu CPU64Regs:$hi, tglobaltlsaddr:$lo)>;
+
+def : WrapperPat<tglobaladdr, DADDiu, GP_64>;
+def : WrapperPat<tconstpool, DADDiu, GP_64>;
+def : WrapperPat<texternalsym, DADDiu, GP_64>;
+def : WrapperPat<tblockaddress, DADDiu, GP_64>;
+def : WrapperPat<tjumptable, DADDiu, GP_64>;
+def : WrapperPat<tglobaltlsaddr, DADDiu, GP_64>;
defm : BrcondPats<CPU64Regs, BEQ64, BNE64, SLT64, SLTu64, SLTi64, SLTiu64,
ZERO_64>;
@@ -268,3 +309,6 @@ def : Pat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>, Requires<[IsN64]>;
def : Pat<(i32 (trunc CPU64Regs:$src)),
(SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>, Requires<[IsN64]>;
+// 32-to-64-bit extension
+def : Pat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>;
+def : Pat<(i64 (zext CPURegs:$src)), (DSRL32 (DSLL64_32 CPURegs:$src), 0)>;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index d27e3ab..a5505d3 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -96,19 +96,17 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (!OutStreamer.hasRawTextSupport()) {
// Lower CPLOAD and CPRESTORE
- if (Opc == Mips::CPLOAD) {
+ if (Opc == Mips::CPLOAD)
MCInstLowering.LowerCPLOAD(MI, MCInsts);
- for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I
- != MCInsts.end(); ++I)
+ else if (Opc == Mips::CPRESTORE)
+ MCInstLowering.LowerCPRESTORE(MI, MCInsts);
+
+ if (!MCInsts.empty()) {
+ for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin();
+ I != MCInsts.end(); ++I)
OutStreamer.EmitInstruction(*I);
return;
}
-
- if (Opc == Mips::CPRESTORE) {
- MCInstLowering.LowerCPRESTORE(MI, TmpInst0);
- OutStreamer.EmitInstruction(TmpInst0);
- return;
- }
}
OutStreamer.EmitInstruction(TmpInst0);
@@ -317,9 +315,9 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
// Otherwise, check the last instruction.
// Check if the last terminator is an unconditional branch.
MachineBasicBlock::const_iterator I = Pred->end();
- while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) ;
+ while (I != Pred->begin() && !(--I)->isTerminator()) ;
- return !I->getDesc().isBarrier();
+ return !I->isBarrier();
}
// Print out an operand for an inline asm expression.
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
index a8f29ae..6b26e24 100644
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -144,7 +144,7 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB){
MCE.StartMachineBasicBlock(MBB);
- for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
I != E; ++I)
emitInstruction(*I);
}
@@ -161,7 +161,7 @@ unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI,
if (Form == MipsII::FrmJ)
return Mips::reloc_mips_26;
if ((Form == MipsII::FrmI || Form == MipsII::FrmFI)
- && MI.getDesc().isBranch())
+ && MI.isBranch())
return Mips::reloc_mips_branch;
if (Form == MipsII::FrmI && MI.getOpcode() == Mips::LUi)
return Mips::reloc_mips_hi;
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index be3b7a0..1d9e9b0 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -96,7 +96,7 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) {
LastFiller = MBB.end();
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
- if (I->getDesc().hasDelaySlot()) {
+ if (I->hasDelaySlot()) {
++FilledSlots;
Changed = true;
@@ -146,7 +146,7 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB,
|| I->isInlineAsm()
|| I->isLabel()
|| FI == LastFiller
- || I->getDesc().isPseudo()
+ || I->isPseudo()
//
// Should not allow:
// ERET, DERET or WAIT, PAUSE. Need to add these to instruction
@@ -174,16 +174,15 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
if (candidate->isImplicitDef() || candidate->isKill())
return true;
- MCInstrDesc MCID = candidate->getDesc();
// Loads or stores cannot be moved past a store to the delay slot
// and stores cannot be moved past a load.
- if (MCID.mayLoad()) {
+ if (candidate->mayLoad()) {
if (sawStore)
return true;
sawLoad = true;
}
- if (MCID.mayStore()) {
+ if (candidate->mayStore()) {
if (sawStore)
return true;
sawStore = true;
@@ -191,7 +190,7 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
return true;
}
- assert((!MCID.isCall() && !MCID.isReturn()) &&
+ assert((!candidate->isCall() && !candidate->isReturn()) &&
"Cannot put calls or returns in delay slot.");
for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
@@ -221,11 +220,11 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
SmallSet<unsigned, 32>& RegUses) {
// If MI is a call or return, just examine the explicit non-variadic operands.
MCInstrDesc MCID = MI->getDesc();
- unsigned e = MCID.isCall() || MCID.isReturn() ? MCID.getNumOperands() :
- MI->getNumOperands();
+ unsigned e = MI->isCall() || MI->isReturn() ? MCID.getNumOperands() :
+ MI->getNumOperands();
// Add RA to RegDefs to prevent users of RA from going into delay slot.
- if (MCID.isCall())
+ if (MI->isCall())
RegDefs.insert(Mips::RA);
for (unsigned i = 0; i != e; ++i) {
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 36aef99..2466545 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -85,8 +85,8 @@ using namespace llvm;
// if frame pointer elimination is disabled.
bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()
- || MFI->isFrameAddressTaken();
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
}
bool MipsFrameLowering::targetHandlesStackFrameRounding() const {
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 9c831ed..b17239d 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -86,10 +86,9 @@ private:
// Complex Pattern.
bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset);
- // getI32Imm - Return a target constant with the specified
- // value, of type i32.
- inline SDValue getI32Imm(unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i32);
+ // getImm - Return a target constant with the specified value.
+ inline SDValue getImm(const SDNode *Node, unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
}
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
@@ -122,21 +121,16 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
}
// on PIC code Load GA
- if (TM.getRelocationModel() == Reloc::PIC_) {
- if (Addr.getOpcode() == MipsISD::WrapperPIC) {
- Base = CurDAG->getRegister(GPReg, ValTy);
- Offset = Addr.getOperand(0);
- return true;
- }
- } else {
+ if (Addr.getOpcode() == MipsISD::Wrapper) {
+ Base = CurDAG->getRegister(GPReg, ValTy);
+ Offset = Addr.getOperand(0);
+ return true;
+ }
+
+ if (TM.getRelocationModel() != Reloc::PIC_) {
if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
Addr.getOpcode() == ISD::TargetGlobalAddress))
return false;
- else if (Addr.getOpcode() == ISD::TargetGlobalTLSAddress) {
- Base = CurDAG->getRegister(GPReg, ValTy);
- Offset = Addr;
- return true;
- }
}
// Addresses of the form FI+const or FI|const
@@ -310,13 +304,24 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
}
case MipsISD::ThreadPointer: {
- unsigned SrcReg = Mips::HWR29;
- unsigned DestReg = Mips::V1;
- SDNode *Rdhwr = CurDAG->getMachineNode(Mips::RDHWR, Node->getDebugLoc(),
- Node->getValueType(0), CurDAG->getRegister(SrcReg, MVT::i32));
+ EVT PtrVT = TLI.getPointerTy();
+ unsigned RdhwrOpc, SrcReg, DestReg;
+
+ if (PtrVT == MVT::i32) {
+ RdhwrOpc = Mips::RDHWR;
+ SrcReg = Mips::HWR29;
+ DestReg = Mips::V1;
+ } else {
+ RdhwrOpc = Mips::RDHWR64;
+ SrcReg = Mips::HWR29_64;
+ DestReg = Mips::V1_64;
+ }
+
+ SDNode *Rdhwr = CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(),
+ Node->getValueType(0), CurDAG->getRegister(SrcReg, PtrVT));
SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg,
SDValue(Rdhwr, 0));
- SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, MVT::i32);
+ SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, PtrVT);
ReplaceUses(SDValue(Node, 0), ResNode);
return ResNode.getNode();
}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index b5a15cf..c9b657c 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -40,11 +40,11 @@ using namespace llvm;
// mask (Pos), and return true.
// For example, if I is 0x003ff800, (Pos, Size) = (11, 11).
static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
- if (!isUInt<32>(I) || !isShiftedMask_32(I))
+ if (!isShiftedMask_64(I))
return false;
- Size = CountPopulation_32(I);
- Pos = CountTrailingZeros_32(I);
+ Size = CountPopulation_64(I);
+ Pos = CountTrailingZeros_64(I);
return true;
}
@@ -54,9 +54,6 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::Hi: return "MipsISD::Hi";
case MipsISD::Lo: return "MipsISD::Lo";
case MipsISD::GPRel: return "MipsISD::GPRel";
- case MipsISD::TlsGd: return "MipsISD::TlsGd";
- case MipsISD::TprelHi: return "MipsISD::TprelHi";
- case MipsISD::TprelLo: return "MipsISD::TprelLo";
case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer";
case MipsISD::Ret: return "MipsISD::Ret";
case MipsISD::FPBrcond: return "MipsISD::FPBrcond";
@@ -72,7 +69,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::DivRemU: return "MipsISD::DivRemU";
case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64";
case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
- case MipsISD::WrapperPIC: return "MipsISD::WrapperPIC";
+ case MipsISD::Wrapper: return "MipsISD::Wrapper";
case MipsISD::DynAlloc: return "MipsISD::DynAlloc";
case MipsISD::Sync: return "MipsISD::Sync";
case MipsISD::Ext: return "MipsISD::Ext";
@@ -129,7 +126,9 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
@@ -157,6 +156,10 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i64, Expand);
@@ -555,20 +558,20 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
return SDValue();
SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1);
-
+ unsigned ShiftRightOpc = ShiftRight.getOpcode();
+
// Op's first operand must be a shift right.
- if (ShiftRight.getOpcode() != ISD::SRA && ShiftRight.getOpcode() != ISD::SRL)
+ if (ShiftRightOpc != ISD::SRA && ShiftRightOpc != ISD::SRL)
return SDValue();
// The second operand of the shift must be an immediate.
- uint64_t Pos;
ConstantSDNode *CN;
if (!(CN = dyn_cast<ConstantSDNode>(ShiftRight.getOperand(1))))
return SDValue();
- Pos = CN->getZExtValue();
-
+ uint64_t Pos = CN->getZExtValue();
uint64_t SMPos, SMSize;
+
// Op's second operand must be a shifted mask.
if (!(CN = dyn_cast<ConstantSDNode>(Mask)) ||
!IsShiftedMask(CN->getZExtValue(), SMPos, SMSize))
@@ -576,10 +579,11 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
// Return if the shifted mask does not start at bit 0 or the sum of its size
// and Pos exceeds the word's size.
- if (SMPos != 0 || Pos + SMSize > 32)
+ EVT ValTy = N->getValueType(0);
+ if (SMPos != 0 || Pos + SMSize > ValTy.getSizeInBits())
return SDValue();
- return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), MVT::i32,
+ return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), ValTy,
ShiftRight.getOperand(0),
DAG.getConstant(Pos, MVT::i32),
DAG.getConstant(SMSize, MVT::i32));
@@ -630,10 +634,11 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG,
// Return if the shift amount and the first bit position of mask are not the
// same.
- if (Shamt != SMPos0)
+ EVT ValTy = N->getValueType(0);
+ if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits()))
return SDValue();
- return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), MVT::i32,
+ return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), ValTy,
Shl.getOperand(0),
DAG.getConstant(SMPos0, MVT::i32),
DAG.getConstant(SMSize0, MVT::i32),
@@ -1485,9 +1490,9 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
(GV->hasLocalLinkage() && !isa<Function>(GV)));
unsigned GotFlag = IsN64 ?
(HasGotOfst ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT_DISP) :
- MipsII::MO_GOT;
+ (HasGotOfst ? MipsII::MO_GOT : MipsII::MO_GOT16);
SDValue GA = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, GotFlag);
- GA = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, GA);
+ GA = DAG.getNode(MipsISD::Wrapper, dl, ValTy, GA);
SDValue ResNode = DAG.getLoad(ValTy, dl,
DAG.getEntryNode(), GA, MachinePointerInfo(),
false, false, false, 0);
@@ -1523,7 +1528,7 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
SDValue BAGOTOffset = DAG.getBlockAddress(BA, ValTy, true, GOTFlag);
- BAGOTOffset = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, BAGOTOffset);
+ BAGOTOffset = DAG.getNode(MipsISD::Wrapper, dl, ValTy, BAGOTOffset);
SDValue BALOOffset = DAG.getBlockAddress(BA, ValTy, true, OFSTFlag);
SDValue Load = DAG.getLoad(ValTy, dl,
DAG.getEntryNode(), BAGOTOffset,
@@ -1535,9 +1540,9 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
SDValue MipsTargetLowering::
LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
{
- // If the relocation model is PIC, use the General Dynamic TLS Model,
- // otherwise use the Initial Exec or Local Exec TLS Model.
- // TODO: implement Local Dynamic TLS model
+ // If the relocation model is PIC, use the General Dynamic TLS Model or
+ // Local Dynamic TLS model, otherwise use the Initial Exec or
+ // Local Exec TLS Model.
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
DebugLoc dl = GA->getDebugLoc();
@@ -1546,45 +1551,59 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
// General Dynamic TLS Model
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32,
- 0, MipsII::MO_TLSGD);
- SDValue Tlsgd = DAG.getNode(MipsISD::TlsGd, dl, MVT::i32, TGA);
- SDValue GP = DAG.getRegister(Mips::GP, MVT::i32);
- SDValue Argument = DAG.getNode(ISD::ADD, dl, MVT::i32, GP, Tlsgd);
+ bool LocalDynamic = GV->hasInternalLinkage();
+ unsigned Flag = LocalDynamic ? MipsII::MO_TLSLDM :MipsII::MO_TLSGD;
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Flag);
+ SDValue Argument = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, TGA);
+ unsigned PtrSize = PtrVT.getSizeInBits();
+ IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize);
+
+ SDValue TlsGetAddr = DAG.getExternalSymbol("__tls_get_addr", PtrVT);
ArgListTy Args;
ArgListEntry Entry;
Entry.Node = Argument;
- Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
+ Entry.Ty = PtrTy;
Args.push_back(Entry);
+
std::pair<SDValue, SDValue> CallResult =
- LowerCallTo(DAG.getEntryNode(),
- (Type *) Type::getInt32Ty(*DAG.getContext()),
- false, false, false, false, 0, CallingConv::C, false, true,
- DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG,
- dl);
-
- return CallResult.first;
+ LowerCallTo(DAG.getEntryNode(), PtrTy,
+ false, false, false, false, 0, CallingConv::C, false, true,
+ TlsGetAddr, Args, DAG, dl);
+
+ SDValue Ret = CallResult.first;
+
+ if (!LocalDynamic)
+ return Ret;
+
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ MipsII::MO_DTPREL_HI);
+ SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi);
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ MipsII::MO_DTPREL_LO);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo);
+ SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Ret);
+ return DAG.getNode(ISD::ADD, dl, PtrVT, Add, Lo);
}
SDValue Offset;
if (GV->isDeclaration()) {
// Initial Exec TLS Model
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
MipsII::MO_GOTTPREL);
- Offset = DAG.getLoad(MVT::i32, dl,
+ TGA = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, TGA);
+ Offset = DAG.getLoad(PtrVT, dl,
DAG.getEntryNode(), TGA, MachinePointerInfo(),
false, false, false, 0);
} else {
// Local Exec TLS Model
- SDVTList VTs = DAG.getVTList(MVT::i32);
- SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
MipsII::MO_TPREL_HI);
- SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
MipsII::MO_TPREL_LO);
- SDValue Hi = DAG.getNode(MipsISD::TprelHi, dl, VTs, &TGAHi, 1);
- SDValue Lo = DAG.getNode(MipsISD::TprelLo, dl, MVT::i32, TGALo);
- Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
+ SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo);
+ Offset = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
}
SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT);
@@ -1594,34 +1613,29 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
SDValue MipsTargetLowering::
LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
{
- SDValue ResNode;
- SDValue HiPart;
+ SDValue HiPart, JTI, JTILo;
// FIXME there isn't actually debug info here
DebugLoc dl = Op.getDebugLoc();
bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
- unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HI;
-
EVT PtrVT = Op.getValueType();
- JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
-
- if (!IsPIC) {
- SDValue Ops[] = { JTI };
- HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1);
+ if (!IsPIC && !IsN64) {
+ JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MipsII::MO_ABS_HI);
+ HiPart = DAG.getNode(MipsISD::Hi, dl, PtrVT, JTI);
+ JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MipsII::MO_ABS_LO);
} else {// Emit Load from Global Pointer
- JTI = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, JTI);
- HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI,
- MachinePointerInfo(),
- false, false, false, 0);
+ unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
+ unsigned OfstFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
+ JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, GOTFlag);
+ JTI = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, JTI);
+ HiPart = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), JTI,
+ MachinePointerInfo(), false, false, false, 0);
+ JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OfstFlag);
}
- SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
- MipsII::MO_ABS_LO);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTILo);
- ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
-
- return ResNode;
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, JTILo);
+ return DAG.getNode(ISD::ADD, dl, PtrVT, HiPart, Lo);
}
SDValue MipsTargetLowering::
@@ -1657,7 +1671,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
SDValue CP = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(),
N->getOffset(), GOTFlag);
- CP = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, CP);
+ CP = DAG.getNode(MipsISD::Wrapper, dl, ValTy, CP);
SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(),
CP, MachinePointerInfo::getConstantPool(),
false, false, false, 0);
@@ -1685,21 +1699,29 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(SV),
false, false, 0);
}
-
-static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG) {
+
+// Called if the size of integer registers is large enough to hold the whole
+// floating point number.
+static SDValue LowerFCOPYSIGNLargeIntReg(SDValue Op, SelectionDAG &DAG) {
// FIXME: Use ext/ins instructions if target architecture is Mips32r2.
+ EVT ValTy = Op.getValueType();
+ EVT IntValTy = MVT::getIntegerVT(ValTy.getSizeInBits());
+ uint64_t Mask = (uint64_t)1 << (ValTy.getSizeInBits() - 1);
DebugLoc dl = Op.getDebugLoc();
- SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(0));
- SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(1));
- SDValue And0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op0,
- DAG.getConstant(0x7fffffff, MVT::i32));
- SDValue And1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op1,
- DAG.getConstant(0x80000000, MVT::i32));
- SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, And0, And1);
- return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Result);
+ SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntValTy, Op.getOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntValTy, Op.getOperand(1));
+ SDValue And0 = DAG.getNode(ISD::AND, dl, IntValTy, Op0,
+ DAG.getConstant(Mask - 1, IntValTy));
+ SDValue And1 = DAG.getNode(ISD::AND, dl, IntValTy, Op1,
+ DAG.getConstant(Mask, IntValTy));
+ SDValue Result = DAG.getNode(ISD::OR, dl, IntValTy, And0, And1);
+ return DAG.getNode(ISD::BITCAST, dl, ValTy, Result);
}
-static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool isLittle) {
+// Called if the size of integer registers is not large enough to hold the whole
+// floating point number (e.g. f64 & 32-bit integer register).
+static SDValue
+LowerFCOPYSIGNSmallIntReg(SDValue Op, SelectionDAG &DAG, bool isLittle) {
// FIXME:
// Use ext/ins instructions if target architecture is Mips32r2.
// Eliminate redundant mfc1 and mtc1 instructions.
@@ -1734,10 +1756,10 @@ SDValue MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG)
assert(Ty == MVT::f32 || Ty == MVT::f64);
- if (Ty == MVT::f32)
- return LowerFCOPYSIGN32(Op, DAG);
+ if (Ty == MVT::f32 || HasMips64)
+ return LowerFCOPYSIGNLargeIntReg(Op, DAG);
else
- return LowerFCOPYSIGN64(Op, DAG, Subtarget->isLittle());
+ return LowerFCOPYSIGNSmallIntReg(Op, DAG, Subtarget->isLittle());
}
SDValue MipsTargetLowering::
@@ -2328,7 +2350,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
// node so that legalize doesn't hack it.
unsigned char OpFlag;
bool IsPICCall = (IsN64 || IsPIC); // true if calls are translated to jalr $25
- bool LoadSymAddr = false;
+ bool GlobalOrExternal = false;
SDValue CalleeLo;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
@@ -2345,7 +2367,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
getPointerTy(), 0, OpFlag);
}
- LoadSymAddr = true;
+ GlobalOrExternal = true;
}
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
if (IsN64 || (!IsO32 && IsPIC))
@@ -2356,16 +2378,16 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
OpFlag = MipsII::MO_GOT_CALL;
Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
getPointerTy(), OpFlag);
- LoadSymAddr = true;
+ GlobalOrExternal = true;
}
SDValue InFlag;
// Create nodes that load address of callee and copy it to T9
if (IsPICCall) {
- if (LoadSymAddr) {
+ if (GlobalOrExternal) {
// Load callee address
- Callee = DAG.getNode(MipsISD::WrapperPIC, dl, getPointerTy(), Callee);
+ Callee = DAG.getNode(MipsISD::Wrapper, dl, getPointerTy(), Callee);
SDValue LoadValue = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
Callee, MachinePointerInfo::getGOT(),
false, false, false, 0);
@@ -2377,7 +2399,11 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
} else
Callee = LoadValue;
}
+ }
+ // T9 should contain the address of the callee function if
+ // -reloction-model=pic or it is an indirect call.
+ if (IsPICCall || !GlobalOrExternal) {
// copy to T9
unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0));
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index f2b64e3..81d093f 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -40,13 +40,6 @@ namespace llvm {
// Handle gp_rel (small data/bss sections) relocation.
GPRel,
- // General Dynamic TLS
- TlsGd,
-
- // Local Exec TLS
- TprelHi,
- TprelLo,
-
// Thread Pointer
ThreadPointer,
@@ -79,7 +72,7 @@ namespace llvm {
BuildPairF64,
ExtractElementF64,
- WrapperPIC,
+ Wrapper,
DynAlloc,
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index e1725fa..21a1862 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -115,7 +115,7 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
let Inst{15-0} = imm16;
}
-class CBranchBase<bits<6> op, dag outs, dag ins, string asmstr,
+class BranchBase<bits<6> op, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
MipsInst<outs, ins, asmstr, pattern, itin, FrmI>
{
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 5358dc0..ea101f7 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -29,8 +29,8 @@ using namespace llvm;
MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm)
: MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
TM(tm), IsN64(TM.getSubtarget<MipsSubtarget>().isABI_N64()),
- RI(*TM.getSubtargetImpl(), *this) {}
-
+ RI(*TM.getSubtargetImpl(), *this),
+ UncondBrOpc(TM.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J) {}
const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const {
return RI;
@@ -236,7 +236,8 @@ static unsigned GetAnalyzableBrOpc(unsigned Opc) {
Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ ||
Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 ||
Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 ||
- Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::J) ?
+ Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B ||
+ Opc == Mips::J) ?
Opc : 0;
}
@@ -320,7 +321,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// If there is only one terminator instruction, process it.
if (!SecondLastOpc) {
// Unconditional branch
- if (LastOpc == Mips::J) {
+ if (LastOpc == UncondBrOpc) {
TBB = LastInst->getOperand(0).getMBB();
return false;
}
@@ -337,7 +338,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// If second to last instruction is an unconditional branch,
// analyze it and remove the last instruction.
- if (SecondLastOpc == Mips::J) {
+ if (SecondLastOpc == UncondBrOpc) {
// Return if the last instruction cannot be removed.
if (!AllowModify)
return true;
@@ -349,7 +350,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// Conditional branch followed by an unconditional branch.
// The last one must be unconditional.
- if (LastOpc != Mips::J)
+ if (LastOpc != UncondBrOpc)
return true;
AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
@@ -391,14 +392,14 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
// Two-way Conditional branch.
if (FBB) {
BuildCondBr(MBB, TBB, DL, Cond);
- BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(FBB);
return 2;
}
// One way branch.
// Unconditional branch.
if (Cond.empty())
- BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(TBB);
else // Conditional branch.
BuildCondBr(MBB, TBB, DL, Cond);
return 1;
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 8fa3052..70cc2cf 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -34,6 +34,7 @@ class MipsInstrInfo : public MipsGenInstrInfo {
MipsTargetMachine &TM;
bool IsN64;
const MipsRegisterInfo RI;
+ unsigned UncondBrOpc;
public:
explicit MipsInstrInfo(MipsTargetMachine &TM);
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 0ae94ab..9fcc5fd 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -107,7 +107,7 @@ def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
// movn %got(d)($gp), %got(c)($gp), $4
// This instruction is illegal since movn can take only register operands.
-def MipsWrapperPIC : SDNode<"MipsISD::WrapperPIC", SDTIntUnaryOp>;
+def MipsWrapper : SDNode<"MipsISD::Wrapper", SDTIntUnaryOp>;
// Pointer to dynamically allocated stack area.
def MipsDynAlloc : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc,
@@ -132,6 +132,8 @@ def NotMips64 : Predicate<"!Subtarget.hasMips64()">;
def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">;
def IsN64 : Predicate<"Subtarget.isABI_N64()">;
def NotN64 : Predicate<"!Subtarget.isABI_N64()">;
+def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">;
//===----------------------------------------------------------------------===//
// Mips Operand, Complex Patterns and Transformations Definitions.
@@ -194,12 +196,12 @@ def size_ins : Operand<i32> {
// Transformation Function - get the lower 16 bits.
def LO16 : SDNodeXForm<imm, [{
- return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF);
+ return getImm(N, N->getZExtValue() & 0xFFFF);
}]>;
// Transformation Function - get the higher 16 bits.
def HI16 : SDNodeXForm<imm, [{
- return getI32Imm((unsigned)N->getZExtValue() >> 16);
+ return getImm(N, (N->getZExtValue() >> 16) & 0xFFFF);
}]>;
// Node immediate fits as 16-bit sign extended on target immediate.
@@ -380,21 +382,13 @@ class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC,
let isPseudo = Pseudo;
}
-// Memory Load/Store
+// Unaligned Memory Load/Store
let canFoldAsLoad = 1 in
-class LoadX<bits<6> op, RegisterClass RC,
- Operand MemOpnd>:
- FMem<op, (outs RC:$rt), (ins MemOpnd:$addr),
- "",
- [], IILoad> {
-}
+class LoadUnAlign<bits<6> op, RegisterClass RC, Operand MemOpnd>:
+ FMem<op, (outs RC:$rt), (ins MemOpnd:$addr), "", [], IILoad> {}
-class StoreX<bits<6> op, RegisterClass RC,
- Operand MemOpnd>:
- FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr),
- "",
- [], IIStore> {
-}
+class StoreUnAlign<bits<6> op, RegisterClass RC, Operand MemOpnd>:
+ FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr), "", [], IIStore> {}
// 32-bit load.
multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode,
@@ -415,10 +409,10 @@ multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode,
}
// 32-bit load.
-multiclass LoadX32<bits<6> op> {
- def #NAME# : LoadX<op, CPURegs, mem>,
+multiclass LoadUnAlign32<bits<6> op> {
+ def #NAME# : LoadUnAlign<op, CPURegs, mem>,
Requires<[NotN64]>;
- def _P8 : LoadX<op, CPURegs, mem64>,
+ def _P8 : LoadUnAlign<op, CPURegs, mem64>,
Requires<[IsN64]>;
}
// 32-bit store.
@@ -440,18 +434,18 @@ multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode,
}
// 32-bit store.
-multiclass StoreX32<bits<6> op> {
- def #NAME# : StoreX<op, CPURegs, mem>,
+multiclass StoreUnAlign32<bits<6> op> {
+ def #NAME# : StoreUnAlign<op, CPURegs, mem>,
Requires<[NotN64]>;
- def _P8 : StoreX<op, CPURegs, mem64>,
+ def _P8 : StoreUnAlign<op, CPURegs, mem64>,
Requires<[IsN64]>;
}
// Conditional Branch
class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>:
- CBranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16),
- !strconcat(instr_asm, "\t$rs, $rt, $imm16"),
- [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$imm16)], IIBranch> {
+ BranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16),
+ !strconcat(instr_asm, "\t$rs, $rt, $imm16"),
+ [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$imm16)], IIBranch> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
@@ -459,9 +453,9 @@ class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>:
class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op,
RegisterClass RC>:
- CBranchBase<op, (outs), (ins RC:$rs, brtarget:$imm16),
- !strconcat(instr_asm, "\t$rs, $imm16"),
- [(brcond (i32 (cond_op RC:$rs, 0)), bb:$imm16)], IIBranch> {
+ BranchBase<op, (outs), (ins RC:$rs, brtarget:$imm16),
+ !strconcat(instr_asm, "\t$rs, $imm16"),
+ [(brcond (i32 (cond_op RC:$rs, 0)), bb:$imm16)], IIBranch> {
let rt = _rt;
let isBranch = 1;
let isTerminator = 1;
@@ -485,11 +479,29 @@ class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, Operand Od,
[(set CPURegs:$rt, (cond_op RC:$rs, imm_type:$imm16))],
IIAlu>;
-// Unconditional branch
-let isBranch=1, isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
+// Jump
class JumpFJ<bits<6> op, string instr_asm>:
FJ<op, (outs), (ins jmptarget:$target),
- !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch>;
+ !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch> {
+ let isBranch=1;
+ let isTerminator=1;
+ let isBarrier=1;
+ let hasDelaySlot = 1;
+ let Predicates = [RelocStatic];
+}
+
+// Unconditional branch
+class UncondBranch<bits<6> op, string instr_asm>:
+ BranchBase<op, (outs), (ins brtarget:$imm16),
+ !strconcat(instr_asm, "\t$imm16"), [(br bb:$imm16)], IIBranch> {
+ let rs = 0;
+ let rt = 0;
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let hasDelaySlot = 1;
+ let Predicates = [RelocPIC];
+}
let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1,
isIndirectBranch = 1 in
@@ -616,21 +628,37 @@ class ByteSwap<bits<6> func, bits<5> sa, string instr_asm>:
}
// Read Hardware
-class ReadHardware: FR<0x1f, 0x3b, (outs CPURegs:$rt), (ins HWRegs:$rd),
- "rdhwr\t$rt, $rd", [], IIAlu> {
+class ReadHardware<RegisterClass CPURegClass, RegisterClass HWRegClass>
+ : FR<0x1f, 0x3b, (outs CPURegClass:$rt), (ins HWRegClass:$rd),
+ "rdhwr\t$rt, $rd", [], IIAlu> {
let rs = 0;
let shamt = 0;
}
// Ext and Ins
-class ExtIns<bits<6> _funct, string instr_asm, dag outs, dag ins,
- list<dag> pattern, InstrItinClass itin>:
- FR<0x1f, _funct, outs, ins, !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
- pattern, itin>, Requires<[HasMips32r2]> {
+class ExtBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
+ FR<0x1f, _funct, (outs RC:$rt), (ins RC:$rs, uimm16:$pos, size_ext:$sz),
+ !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
+ [(set RC:$rt, (MipsExt RC:$rs, imm:$pos, imm:$sz))], NoItinerary> {
bits<5> pos;
bits<5> sz;
let rd = sz;
let shamt = pos;
+ let Predicates = [HasMips32r2];
+}
+
+class InsBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
+ FR<0x1f, _funct, (outs RC:$rt),
+ (ins RC:$rs, uimm16:$pos, size_ins:$sz, RC:$src),
+ !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
+ [(set RC:$rt, (MipsIns RC:$rs, imm:$pos, imm:$sz, RC:$src))],
+ NoItinerary> {
+ bits<5> pos;
+ bits<5> sz;
+ let rd = sz;
+ let shamt = pos;
+ let Predicates = [HasMips32r2];
+ let Constraints = "$src = $rt";
}
// Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*).
@@ -795,10 +823,10 @@ defm USH : StoreM32<0x29, "ush", truncstorei16_u, 1>;
defm USW : StoreM32<0x2b, "usw", store_u, 1>;
/// Primitives for unaligned
-defm LWL : LoadX32<0x22>;
-defm LWR : LoadX32<0x26>;
-defm SWL : StoreX32<0x2A>;
-defm SWR : StoreX32<0x2E>;
+defm LWL : LoadUnAlign32<0x22>;
+defm LWR : LoadUnAlign32<0x26>;
+defm SWL : StoreUnAlign32<0x2A>;
+defm SWR : StoreUnAlign32<0x2E>;
let hasSideEffects = 1 in
def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype",
@@ -822,6 +850,7 @@ def J : JumpFJ<0x02, "j">;
def JR : JumpFR<0x00, 0x08, "jr", CPURegs>;
def JAL : JumpLink<0x03, "jal">;
def JALR : JumpLinkReg<0x00, 0x09, "jalr">;
+def B : UncondBranch<0x04, "b">;
def BEQ : CBranch<0x04, "beq", seteq, CPURegs>;
def BNE : CBranch<0x05, "bne", setne, CPURegs>;
def BGEZ : CBranchZero<0x01, 1, "bgez", setge, CPURegs>;
@@ -888,21 +917,10 @@ def MSUBU : MArithR<5, "msubu", MipsMSubu>;
def MUL : ArithLogicR<0x1c, 0x02, "mul", mul, IIImul, CPURegs, 1>,
Requires<[HasMips32]>;
-def RDHWR : ReadHardware;
-
-def EXT : ExtIns<0, "ext", (outs CPURegs:$rt),
- (ins CPURegs:$rs, uimm16:$pos, size_ext:$sz),
- [(set CPURegs:$rt,
- (MipsExt CPURegs:$rs, immZExt5:$pos, immZExt5:$sz))],
- NoItinerary>;
+def RDHWR : ReadHardware<CPURegs, HWRegs>;
-let Constraints = "$src = $rt" in
-def INS : ExtIns<4, "ins", (outs CPURegs:$rt),
- (ins CPURegs:$rs, uimm16:$pos, size_ins:$sz, CPURegs:$src),
- [(set CPURegs:$rt,
- (MipsIns CPURegs:$rs, immZExt5:$pos, immZExt5:$sz,
- CPURegs:$src))],
- NoItinerary>;
+def EXT : ExtBase<0, "ext", CPURegs>;
+def INS : InsBase<4, "ins", CPURegs>;
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
@@ -939,11 +957,13 @@ def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
+def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>;
def : Pat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>;
def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>;
def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>;
+def : Pat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
(ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
@@ -953,6 +973,8 @@ def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
(ADDiu CPURegs:$hi, tjumptable:$lo)>;
def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
(ADDiu CPURegs:$hi, tconstpool:$lo)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tglobaltlsaddr:$lo)),
+ (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>;
// gp_rel relocs
def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
@@ -960,26 +982,17 @@ def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)),
(ADDiu CPURegs:$gp, tconstpool:$in)>;
-// tlsgd
-def : Pat<(add CPURegs:$gp, (MipsTlsGd tglobaltlsaddr:$in)),
- (ADDiu CPURegs:$gp, tglobaltlsaddr:$in)>;
-
-// tprel hi/lo
-def : Pat<(MipsTprelHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
-def : Pat<(MipsTprelLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
-def : Pat<(add CPURegs:$hi, (MipsTprelLo tglobaltlsaddr:$lo)),
- (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>;
-
// wrapper_pic
-class WrapperPICPat<SDNode node>:
- Pat<(MipsWrapperPIC node:$in),
- (ADDiu GP, node:$in)>;
-
-def : WrapperPICPat<tglobaladdr>;
-def : WrapperPICPat<tconstpool>;
-def : WrapperPICPat<texternalsym>;
-def : WrapperPICPat<tblockaddress>;
-def : WrapperPICPat<tjumptable>;
+class WrapperPat<SDNode node, Instruction ADDiuOp, Register GPReg>:
+ Pat<(MipsWrapper node:$in),
+ (ADDiuOp GPReg, node:$in)>;
+
+def : WrapperPat<tglobaladdr, ADDiu, GP>;
+def : WrapperPat<tconstpool, ADDiu, GP>;
+def : WrapperPat<texternalsym, ADDiu, GP>;
+def : WrapperPat<tblockaddress, ADDiu, GP>;
+def : WrapperPat<tjumptable, ADDiu, GP>;
+def : WrapperPat<tglobaltlsaddr, ADDiu, GP>;
// Mips does not have "not", so we expand our way
def : Pat<(not CPURegs:$in),
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index 6fc2af1..23486d3 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -41,10 +41,14 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case MipsII::MO_NO_FLAG: Kind = MCSymbolRefExpr::VK_None; break;
case MipsII::MO_GPREL: Kind = MCSymbolRefExpr::VK_Mips_GPREL; break;
case MipsII::MO_GOT_CALL: Kind = MCSymbolRefExpr::VK_Mips_GOT_CALL; break;
+ case MipsII::MO_GOT16: Kind = MCSymbolRefExpr::VK_Mips_GOT16; break;
case MipsII::MO_GOT: Kind = MCSymbolRefExpr::VK_Mips_GOT; break;
case MipsII::MO_ABS_HI: Kind = MCSymbolRefExpr::VK_Mips_ABS_HI; break;
case MipsII::MO_ABS_LO: Kind = MCSymbolRefExpr::VK_Mips_ABS_LO; break;
case MipsII::MO_TLSGD: Kind = MCSymbolRefExpr::VK_Mips_TLSGD; break;
+ case MipsII::MO_TLSLDM: Kind = MCSymbolRefExpr::VK_Mips_TLSLDM; break;
+ case MipsII::MO_DTPREL_HI:Kind = MCSymbolRefExpr::VK_Mips_DTPREL_HI; break;
+ case MipsII::MO_DTPREL_LO:Kind = MCSymbolRefExpr::VK_Mips_DTPREL_LO; break;
case MipsII::MO_GOTTPREL: Kind = MCSymbolRefExpr::VK_Mips_GOTTPREL; break;
case MipsII::MO_TPREL_HI: Kind = MCSymbolRefExpr::VK_Mips_TPREL_HI; break;
case MipsII::MO_TPREL_LO: Kind = MCSymbolRefExpr::VK_Mips_TPREL_LO; break;
@@ -136,14 +140,35 @@ void MipsMCInstLower::LowerCPLOAD(const MachineInstr *MI,
}
// Lower ".cprestore offset" to "sw $gp, offset($sp)".
-void MipsMCInstLower::LowerCPRESTORE(const MachineInstr *MI, MCInst &OutMI) {
- OutMI.clear();
- OutMI.setOpcode(Mips::SW);
- OutMI.addOperand(MCOperand::CreateReg(Mips::GP));
- OutMI.addOperand(MCOperand::CreateReg(Mips::SP));
+void MipsMCInstLower::LowerCPRESTORE(const MachineInstr *MI,
+ SmallVector<MCInst, 4>& MCInsts) {
const MachineOperand &MO = MI->getOperand(0);
assert(MO.isImm() && "CPRESTORE's operand must be an immediate.");
- OutMI.addOperand(MCOperand::CreateImm(MO.getImm()));
+ unsigned Offset = MO.getImm(), Reg = Mips::SP;
+ MCInst Sw;
+
+ if (Offset >= 0x8000) {
+ unsigned Hi = (Offset >> 16) + ((Offset & 0x8000) != 0);
+ Offset &= 0xffff;
+ Reg = Mips::AT;
+
+ // lui at,hi
+ // addu at,at,sp
+ MCInsts.resize(2);
+ MCInsts[0].setOpcode(Mips::LUi);
+ MCInsts[0].addOperand(MCOperand::CreateReg(Mips::AT));
+ MCInsts[0].addOperand(MCOperand::CreateImm(Hi));
+ MCInsts[1].setOpcode(Mips::ADDu);
+ MCInsts[1].addOperand(MCOperand::CreateReg(Mips::AT));
+ MCInsts[1].addOperand(MCOperand::CreateReg(Mips::AT));
+ MCInsts[1].addOperand(MCOperand::CreateReg(Mips::SP));
+ }
+
+ Sw.setOpcode(Mips::SW);
+ Sw.addOperand(MCOperand::CreateReg(Mips::GP));
+ Sw.addOperand(MCOperand::CreateReg(Reg));
+ Sw.addOperand(MCOperand::CreateImm(Offset));
+ MCInsts.push_back(Sw);
}
MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO,
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index 98e37e4..1490c14 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -36,7 +36,7 @@ public:
MipsAsmPrinter &asmprinter);
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
void LowerCPLOAD(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts);
- void LowerCPRESTORE(const MachineInstr *MI, MCInst &OutMI);
+ void LowerCPRESTORE(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts);
void LowerUnalignedLoadStore(const MachineInstr *MI,
SmallVector<MCInst, 4>& MCInsts);
private:
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 06c4a66..e5a0f08 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -125,6 +125,7 @@ getRegisterNumbering(unsigned RegEnum)
case Mips::D14:
return 28;
case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64:
+ case Mips::HWR29:
return 29;
case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64:
case Mips::D15:
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 925ad9e..76ee2e6 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -239,6 +239,7 @@ let Namespace = "Mips" in {
// Hardware register $29
def HWR29 : Register<"29">;
+ def HWR29_64 : Register<"29">;
}
//===----------------------------------------------------------------------===//
@@ -301,3 +302,5 @@ def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)> {
// Hardware registers
def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
+def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>;
+
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 016d449..dc299f2 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -31,7 +31,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
{
std::string CPUName = CPU;
if (CPUName.empty())
- CPUName = "mips32r1";
+ CPUName = "mips32";
// Parse features string.
ParseSubtargetFeatures(CPUName, FS);
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 5d6b24f..02887fa 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -34,51 +34,51 @@ extern "C" void LLVMInitializeMipsTarget() {
// Using CodeModel::Large enables different CALL behavior.
MipsTargetMachine::
MipsTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
- bool isLittle):
- LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
- Subtarget(TT, CPU, FS, isLittle),
- DataLayout(isLittle ?
- (Subtarget.isABI_N64() ?
- "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
- "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
- (Subtarget.isABI_N64() ?
- "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
- "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
- InstrInfo(*this),
- FrameLowering(Subtarget),
- TLInfo(*this), TSInfo(*this), JITInfo() {
+ bool isLittle)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, isLittle),
+ DataLayout(isLittle ?
+ (Subtarget.isABI_N64() ?
+ "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
+ "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
+ (Subtarget.isABI_N64() ?
+ "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
+ "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
+ InstrInfo(*this),
+ FrameLowering(Subtarget),
+ TLInfo(*this), TSInfo(*this), JITInfo() {
}
MipsebTargetMachine::
MipsebTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL) :
- MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {}
+ CodeGenOpt::Level OL)
+ : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
MipselTargetMachine::
MipselTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL) :
- MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {}
+ CodeGenOpt::Level OL)
+ : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
Mips64ebTargetMachine::
Mips64ebTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL) :
- MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {}
+ CodeGenOpt::Level OL)
+ : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
Mips64elTargetMachine::
Mips64elTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL) :
- MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {}
+ CodeGenOpt::Level OL)
+ : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
// Install an instruction selector pass using
// the ISelDag to gen Mips code.
@@ -120,4 +120,3 @@ bool MipsTargetMachine::addCodeEmitter(PassManagerBase &PM,
PM.add(createMipsJITCodeEmitterPass(*this, JCE));
return false;
}
-
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index e40d9e2..6842373 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -38,7 +38,7 @@ namespace llvm {
public:
MipsTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool isLittle);
@@ -82,7 +82,7 @@ namespace llvm {
class MipsebTargetMachine : public MipsTargetMachine {
public:
MipsebTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
@@ -92,7 +92,7 @@ public:
class MipselTargetMachine : public MipsTargetMachine {
public:
MipselTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
@@ -103,6 +103,7 @@ class Mips64ebTargetMachine : public MipsTargetMachine {
public:
Mips64ebTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
@@ -113,6 +114,7 @@ class Mips64elTargetMachine : public MipsTargetMachine {
public:
Mips64elTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
diff --git a/lib/Target/Mips/TargetInfo/CMakeLists.txt b/lib/Target/Mips/TargetInfo/CMakeLists.txt
index 5692604..4172d00 100644
--- a/lib/Target/Mips/TargetInfo/CMakeLists.txt
+++ b/lib/Target/Mips/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMMipsInfo
MipsTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMMipsInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMMipsInfo MipsCommonTableGen)
diff --git a/lib/Target/Mips/TargetInfo/LLVMBuild.txt b/lib/Target/Mips/TargetInfo/LLVMBuild.txt
index 90ae260..2d42568 100644
--- a/lib/Target/Mips/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/Mips/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MipsInfo
parent = Mips
required_libraries = MC Support Target
add_to_library_groups = Mips
-
diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt
index 6709c1b..a9f4330 100644
--- a/lib/Target/PTX/CMakeLists.txt
+++ b/lib/Target/PTX/CMakeLists.txt
@@ -25,20 +25,6 @@ add_llvm_target(PTXCodeGen
PTXTargetMachine.cpp
)
-add_llvm_library_dependencies(LLVMPTXCodeGen
- LLVMAnalysis
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMPTXDesc
- LLVMPTXInfo
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
- )
-
add_subdirectory(TargetInfo)
add_subdirectory(InstPrinter)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PTX/InstPrinter/CMakeLists.txt b/lib/Target/PTX/InstPrinter/CMakeLists.txt
index 029d060..b252893 100644
--- a/lib/Target/PTX/InstPrinter/CMakeLists.txt
+++ b/lib/Target/PTX/InstPrinter/CMakeLists.txt
@@ -6,8 +6,3 @@ add_llvm_library(LLVMPTXAsmPrinter
add_dependencies(LLVMPTXAsmPrinter PTXCommonTableGen)
-add_llvm_library_dependencies(LLVMPTXAsmPrinter
- LLVMMC
- LLVMSupport
- )
-
diff --git a/lib/Target/PTX/InstPrinter/LLVMBuild.txt b/lib/Target/PTX/InstPrinter/LLVMBuild.txt
index be89c10..af5d200 100644
--- a/lib/Target/PTX/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/PTX/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = PTXAsmPrinter
parent = PTX
required_libraries = MC Support
add_to_library_groups = PTX
-
diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
index 2f6c92d..5fecb85 100644
--- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
+++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
@@ -38,7 +38,50 @@ StringRef PTXInstPrinter::getOpcodeName(unsigned Opcode) const {
}
void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- OS << getRegisterName(RegNo);
+ // Decode the register number into type and offset
+ unsigned RegSpace = RegNo & 0x7;
+ unsigned RegType = (RegNo >> 3) & 0x7;
+ unsigned RegOffset = RegNo >> 6;
+
+ // Print the register
+ OS << "%";
+
+ switch (RegSpace) {
+ default:
+ llvm_unreachable("Unknown register space!");
+ case PTXRegisterSpace::Reg:
+ switch (RegType) {
+ default:
+ llvm_unreachable("Unknown register type!");
+ case PTXRegisterType::Pred:
+ OS << "p";
+ break;
+ case PTXRegisterType::B16:
+ OS << "rh";
+ break;
+ case PTXRegisterType::B32:
+ OS << "r";
+ break;
+ case PTXRegisterType::B64:
+ OS << "rd";
+ break;
+ case PTXRegisterType::F32:
+ OS << "f";
+ break;
+ case PTXRegisterType::F64:
+ OS << "fd";
+ break;
+ }
+ break;
+ case PTXRegisterSpace::Return:
+ OS << "ret";
+ break;
+ case PTXRegisterSpace::Argument:
+ OS << "arg";
+ break;
+ }
+
+ OS << RegOffset;
}
void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
@@ -139,6 +182,8 @@ void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
} else {
O << "0000000000000000";
}
+ } else if (Op.isReg()) {
+ printRegName(O, Op.getReg());
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
const MCExpr *Expr = Op.getExpr();
diff --git a/lib/Target/PTX/LLVMBuild.txt b/lib/Target/PTX/LLVMBuild.txt
index 22c70de..15a1eb5 100644
--- a/lib/Target/PTX/LLVMBuild.txt
+++ b/lib/Target/PTX/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
[component_0]
type = TargetGroup
name = PTX
@@ -27,4 +30,3 @@ name = PTXCodeGen
parent = PTX
required_libraries = Analysis AsmPrinter CodeGen Core MC PTXDesc PTXInfo SelectionDAG Support Target TransformUtils
add_to_library_groups = PTX
-
diff --git a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
index 94dbcee..d1fd74c 100644
--- a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
@@ -3,11 +3,4 @@ add_llvm_library(LLVMPTXDesc
PTXMCAsmInfo.cpp
)
-add_llvm_library_dependencies(LLVMPTXDesc
- LLVMMC
- LLVMPTXAsmPrinter
- LLVMPTXInfo
- LLVMSupport
- )
-
add_dependencies(LLVMPTXDesc PTXCommonTableGen)
diff --git a/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt b/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt
index fff21c1..19b80c5 100644
--- a/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = PTXDesc
parent = PTX
required_libraries = MC PTXAsmPrinter PTXInfo Support
add_to_library_groups = PTX
-
diff --git a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
index c6094be..77a298d 100644
--- a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
+++ b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
@@ -17,6 +17,8 @@
#ifndef PTXBASEINFO_H
#define PTXBASEINFO_H
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "PTXMCTargetDesc.h"
namespace llvm {
@@ -57,6 +59,75 @@ namespace llvm {
RndPosInfInt = 10 // .rpi
};
} // namespace PTXII
+
+ namespace PTXRegisterType {
+ // Register type encoded in MCOperands
+ enum {
+ Pred = 0,
+ B16,
+ B32,
+ B64,
+ F32,
+ F64
+ };
+ } // namespace PTXRegisterType
+
+ namespace PTXRegisterSpace {
+ // Register space encoded in MCOperands
+ enum {
+ Reg = 0,
+ Local,
+ Param,
+ Argument,
+ Return
+ };
+ }
+
+ inline static void decodeRegisterName(raw_ostream &OS,
+ unsigned EncodedReg) {
+ OS << "%";
+
+ unsigned RegSpace = EncodedReg & 0x7;
+ unsigned RegType = (EncodedReg >> 3) & 0x7;
+ unsigned RegOffset = EncodedReg >> 6;
+
+ switch (RegSpace) {
+ default:
+ llvm_unreachable("Unknown register space!");
+ case PTXRegisterSpace::Reg:
+ switch (RegType) {
+ default:
+ llvm_unreachable("Unknown register type!");
+ case PTXRegisterType::Pred:
+ OS << "p";
+ break;
+ case PTXRegisterType::B16:
+ OS << "rh";
+ break;
+ case PTXRegisterType::B32:
+ OS << "r";
+ break;
+ case PTXRegisterType::B64:
+ OS << "rd";
+ break;
+ case PTXRegisterType::F32:
+ OS << "f";
+ break;
+ case PTXRegisterType::F64:
+ OS << "fd";
+ break;
+ }
+ break;
+ case PTXRegisterSpace::Return:
+ OS << "ret";
+ break;
+ case PTXRegisterSpace::Argument:
+ OS << "arg";
+ break;
+ }
+
+ OS << RegOffset;
+ }
} // namespace llvm
#endif
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp
index bdf238b..77ed71d 100644
--- a/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -51,23 +51,23 @@ using namespace llvm;
static const char PARAM_PREFIX[] = "__param_";
static const char RETURN_PREFIX[] = "__ret_";
-static const char *getRegisterTypeName(unsigned RegNo,
- const MachineRegisterInfo& MRI) {
- const TargetRegisterClass *TRC = MRI.getRegClass(RegNo);
-
-#define TEST_REGCLS(cls, clsstr) \
- if (PTX::cls ## RegisterClass == TRC) return # clsstr;
-
- TEST_REGCLS(RegPred, pred);
- TEST_REGCLS(RegI16, b16);
- TEST_REGCLS(RegI32, b32);
- TEST_REGCLS(RegI64, b64);
- TEST_REGCLS(RegF32, b32);
- TEST_REGCLS(RegF64, b64);
-#undef TEST_REGCLS
-
- llvm_unreachable("Not in any register class!");
- return NULL;
+static const char *getRegisterTypeName(unsigned RegType) {
+ switch (RegType) {
+ default:
+ llvm_unreachable("Unknown register type");
+ case PTXRegisterType::Pred:
+ return ".pred";
+ case PTXRegisterType::B16:
+ return ".b16";
+ case PTXRegisterType::B32:
+ return ".b32";
+ case PTXRegisterType::B64:
+ return ".b64";
+ case PTXRegisterType::F32:
+ return ".f32";
+ case PTXRegisterType::F64:
+ return ".f64";
+ }
}
static const char *getStateSpaceName(unsigned addressSpace) {
@@ -188,32 +188,32 @@ void PTXAsmPrinter::EmitFunctionBodyStart() {
unsigned numRegs;
// pred
- numRegs = MFI->getNumRegistersForClass(PTX::RegPredRegisterClass);
+ numRegs = MFI->countRegisters(PTXRegisterType::Pred, PTXRegisterSpace::Reg);
if(numRegs > 0)
os << "\t.reg .pred %p<" << numRegs << ">;\n";
// i16
- numRegs = MFI->getNumRegistersForClass(PTX::RegI16RegisterClass);
+ numRegs = MFI->countRegisters(PTXRegisterType::B16, PTXRegisterSpace::Reg);
if(numRegs > 0)
os << "\t.reg .b16 %rh<" << numRegs << ">;\n";
// i32
- numRegs = MFI->getNumRegistersForClass(PTX::RegI32RegisterClass);
+ numRegs = MFI->countRegisters(PTXRegisterType::B32, PTXRegisterSpace::Reg);
if(numRegs > 0)
os << "\t.reg .b32 %r<" << numRegs << ">;\n";
// i64
- numRegs = MFI->getNumRegistersForClass(PTX::RegI64RegisterClass);
+ numRegs = MFI->countRegisters(PTXRegisterType::B64, PTXRegisterSpace::Reg);
if(numRegs > 0)
os << "\t.reg .b64 %rd<" << numRegs << ">;\n";
// f32
- numRegs = MFI->getNumRegistersForClass(PTX::RegF32RegisterClass);
+ numRegs = MFI->countRegisters(PTXRegisterType::F32, PTXRegisterSpace::Reg);
if(numRegs > 0)
os << "\t.reg .f32 %f<" << numRegs << ">;\n";
// f64
- numRegs = MFI->getNumRegistersForClass(PTX::RegF64RegisterClass);
+ numRegs = MFI->countRegisters(PTXRegisterType::F64, PTXRegisterSpace::Reg);
if(numRegs > 0)
os << "\t.reg .f64 %fd<" << numRegs << ">;\n";
@@ -368,7 +368,6 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() {
const PTXParamManager &PM = MFI->getParamManager();
const bool isKernel = MFI->isKernel();
const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
- const MachineRegisterInfo& MRI = MF->getRegInfo();
SmallString<128> decl;
raw_svector_ostream os(decl);
@@ -391,7 +390,7 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() {
if (i != b)
os << ", ";
- os << ".reg ." << getRegisterTypeName(*i, MRI) << ' '
+ os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' '
<< MFI->getRegisterName(*i);
}
}
@@ -450,7 +449,7 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() {
if (i != b)
os << ", ";
- os << ".reg ." << getRegisterTypeName(*i, MRI) << ' '
+ os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' '
<< MFI->getRegisterName(*i);
}
}
@@ -521,20 +520,18 @@ MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO,
MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
MCOperand MCOp;
const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
- const MCExpr *Expr;
- const char *RegSymbolName;
+ unsigned EncodedReg;
switch (MO.getType()) {
default:
llvm_unreachable("Unknown operand type");
case MachineOperand::MO_Register:
- // We create register operands as symbols, since the PTXInstPrinter class
- // has no way to map virtual registers back to a name without some ugly
- // hacks.
- // FIXME: Figure out a better way to handle virtual register naming.
- RegSymbolName = MFI->getRegisterName(MO.getReg());
- Expr = MCSymbolRefExpr::Create(RegSymbolName, MCSymbolRefExpr::VK_None,
- OutContext);
- MCOp = MCOperand::CreateExpr(Expr);
+ if (MO.getReg() > 0) {
+ // Encode the register
+ EncodedReg = MFI->getEncodedRegister(MO.getReg());
+ } else {
+ EncodedReg = 0;
+ }
+ MCOp = MCOperand::CreateReg(EncodedReg);
break;
case MachineOperand::MO_Immediate:
MCOp = MCOperand::CreateImm(MO.getImm());
diff --git a/lib/Target/PTX/PTXFPRoundingModePass.cpp b/lib/Target/PTX/PTXFPRoundingModePass.cpp
index 0b653e0..a21d172 100644
--- a/lib/Target/PTX/PTXFPRoundingModePass.cpp
+++ b/lib/Target/PTX/PTXFPRoundingModePass.cpp
@@ -23,9 +23,11 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
// NOTE: PTXFPRoundingModePass should be executed just before emission.
-namespace llvm {
+namespace {
/// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to
/// all FP instructions. Essentially, this pass just looks for all FP
/// instructions that have a rounding mode set to RndDefault, and sets an
@@ -58,7 +60,7 @@ namespace llvm {
void initializeMap();
void processInstruction(MachineInstr &MI);
}; // class PTXFPRoundingModePass
-} // namespace llvm
+} // end anonymous namespace
using namespace llvm;
diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp
index 17191fb..a012297 100644
--- a/lib/Target/PTX/PTXISelLowering.cpp
+++ b/lib/Target/PTX/PTXISelLowering.cpp
@@ -243,6 +243,30 @@ SDValue PTXTargetLowering::
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
EVT RegVT = Ins[i].VT;
TargetRegisterClass* TRC = getRegClassFor(RegVT);
+ unsigned RegType;
+
+ // Determine which register class we need
+ if (RegVT == MVT::i1) {
+ RegType = PTXRegisterType::Pred;
+ }
+ else if (RegVT == MVT::i16) {
+ RegType = PTXRegisterType::B16;
+ }
+ else if (RegVT == MVT::i32) {
+ RegType = PTXRegisterType::B32;
+ }
+ else if (RegVT == MVT::i64) {
+ RegType = PTXRegisterType::B64;
+ }
+ else if (RegVT == MVT::f32) {
+ RegType = PTXRegisterType::F32;
+ }
+ else if (RegVT == MVT::f64) {
+ RegType = PTXRegisterType::F64;
+ }
+ else {
+ llvm_unreachable("Unknown parameter type");
+ }
// Use a unique index in the instruction to prevent instruction folding.
// Yes, this is a hack.
@@ -253,7 +277,7 @@ SDValue PTXTargetLowering::
InVals.push_back(ArgValue);
- MFI->addArgReg(Reg);
+ MFI->addRegister(Reg, RegType, PTXRegisterSpace::Argument);
}
}
@@ -304,25 +328,32 @@ SDValue PTXTargetLowering::
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
EVT RegVT = Outs[i].VT;
TargetRegisterClass* TRC = 0;
+ unsigned RegType;
// Determine which register class we need
if (RegVT == MVT::i1) {
TRC = PTX::RegPredRegisterClass;
+ RegType = PTXRegisterType::Pred;
}
else if (RegVT == MVT::i16) {
TRC = PTX::RegI16RegisterClass;
+ RegType = PTXRegisterType::B16;
}
else if (RegVT == MVT::i32) {
TRC = PTX::RegI32RegisterClass;
+ RegType = PTXRegisterType::B32;
}
else if (RegVT == MVT::i64) {
TRC = PTX::RegI64RegisterClass;
+ RegType = PTXRegisterType::B64;
}
else if (RegVT == MVT::f32) {
TRC = PTX::RegF32RegisterClass;
+ RegType = PTXRegisterType::F32;
}
else if (RegVT == MVT::f64) {
TRC = PTX::RegF64RegisterClass;
+ RegType = PTXRegisterType::F64;
}
else {
llvm_unreachable("Unknown parameter type");
@@ -335,7 +366,7 @@ SDValue PTXTargetLowering::
Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg);
- MFI->addRetReg(Reg);
+ MFI->addRegister(Reg, RegType, PTXRegisterSpace::Return);
}
}
diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp
index 1b947a5..871b3a7 100644
--- a/lib/Target/PTX/PTXInstrInfo.cpp
+++ b/lib/Target/PTX/PTXInstrInfo.cpp
@@ -116,7 +116,7 @@ bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const {
}
bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
- return !isPredicated(MI) && get(MI->getOpcode()).isTerminator();
+ return !isPredicated(MI) && MI->isTerminator();
}
bool PTXInstrInfo::
@@ -184,15 +184,13 @@ AnalyzeBranch(MachineBasicBlock &MBB,
if (MBB.empty())
return true;
- MachineBasicBlock::const_iterator iter = MBB.end();
+ MachineBasicBlock::iterator iter = MBB.end();
const MachineInstr& instLast1 = *--iter;
- const MCInstrDesc &desc1 = instLast1.getDesc();
// for special case that MBB has only 1 instruction
const bool IsSizeOne = MBB.size() == 1;
// if IsSizeOne is true, *--iter and instLast2 are invalid
// we put a dummy value in instLast2 and desc2 since they are used
const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter;
- const MCInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc();
DEBUG(dbgs() << "\n");
DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n");
@@ -207,7 +205,7 @@ AnalyzeBranch(MachineBasicBlock &MBB,
}
// this block ends with only an unconditional branch
- if (desc1.isUnconditionalBranch() &&
+ if (instLast1.isUnconditionalBranch() &&
// when IsSizeOne is true, it "absorbs" the evaluation of instLast2
(IsSizeOne || !IsAnyKindOfBranch(instLast2))) {
DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n");
@@ -217,7 +215,7 @@ AnalyzeBranch(MachineBasicBlock &MBB,
// this block ends with a conditional branch and
// it falls through to a successor block
- if (desc1.isConditionalBranch() &&
+ if (instLast1.isConditionalBranch() &&
IsAnySuccessorAlsoLayoutSuccessor(MBB)) {
DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n");
TBB = GetBranchTarget(instLast1);
@@ -233,8 +231,8 @@ AnalyzeBranch(MachineBasicBlock &MBB,
// this block ends with a conditional branch
// followed by an unconditional branch
- if (desc2.isConditionalBranch() &&
- desc1.isUnconditionalBranch()) {
+ if (instLast2.isConditionalBranch() &&
+ instLast1.isUnconditionalBranch()) {
DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n");
TBB = GetBranchTarget(instLast2);
FBB = GetBranchTarget(instLast1);
@@ -341,8 +339,7 @@ void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) {
}
bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) {
- const MCInstrDesc &desc = inst.getDesc();
- return desc.isTerminator() || desc.isBranch() || desc.isIndirectBranch();
+ return inst.isTerminator() || inst.isBranch() || inst.isIndirectBranch();
}
bool PTXInstrInfo::
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td
index bcd5bcf..19a862f 100644
--- a/lib/Target/PTX/PTXInstrInfo.td
+++ b/lib/Target/PTX/PTXInstrInfo.td
@@ -825,17 +825,17 @@ let hasSideEffects = 1 in {
///===- Parameter Passing Pseudo-Instructions -----------------------------===//
def READPARAMPRED : InstPTX<(outs RegPred:$a), (ins i32imm:$b),
- "mov.pred\t$a, %param$b", []>;
+ "mov.pred\t$a, %arg$b", []>;
def READPARAMI16 : InstPTX<(outs RegI16:$a), (ins i32imm:$b),
- "mov.b16\t$a, %param$b", []>;
+ "mov.b16\t$a, %arg$b", []>;
def READPARAMI32 : InstPTX<(outs RegI32:$a), (ins i32imm:$b),
- "mov.b32\t$a, %param$b", []>;
+ "mov.b32\t$a, %arg$b", []>;
def READPARAMI64 : InstPTX<(outs RegI64:$a), (ins i32imm:$b),
- "mov.b64\t$a, %param$b", []>;
+ "mov.b64\t$a, %arg$b", []>;
def READPARAMF32 : InstPTX<(outs RegF32:$a), (ins i32imm:$b),
- "mov.f32\t$a, %param$b", []>;
+ "mov.f32\t$a, %arg$b", []>;
def READPARAMF64 : InstPTX<(outs RegF64:$a), (ins i32imm:$b),
- "mov.f64\t$a, %param$b", []>;
+ "mov.f64\t$a, %arg$b", []>;
def WRITEPARAMPRED : InstPTX<(outs), (ins RegPred:$a), "//w", []>;
def WRITEPARAMI16 : InstPTX<(outs), (ins RegI16:$a), "//w", []>;
diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp
index b33a273..26ec623 100644
--- a/lib/Target/PTX/PTXMFInfoExtract.cpp
+++ b/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -22,9 +22,11 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
// NOTE: PTXMFInfoExtract must after register allocation!
-namespace llvm {
+namespace {
/// PTXMFInfoExtract - PTX specific code to extract of PTX machine
/// function information for PTXAsmPrinter
///
@@ -42,7 +44,7 @@ namespace llvm {
return "PTX Machine Function Info Extractor";
}
}; // class PTXMFInfoExtract
-} // namespace llvm
+} // end anonymous namespace
using namespace llvm;
@@ -56,7 +58,20 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
- MFI->addVirtualRegister(TRC, Reg);
+ unsigned RegType;
+ if (TRC == PTX::RegPredRegisterClass)
+ RegType = PTXRegisterType::Pred;
+ else if (TRC == PTX::RegI16RegisterClass)
+ RegType = PTXRegisterType::B16;
+ else if (TRC == PTX::RegI32RegisterClass)
+ RegType = PTXRegisterType::B32;
+ else if (TRC == PTX::RegI64RegisterClass)
+ RegType = PTXRegisterType::B64;
+ else if (TRC == PTX::RegF32RegisterClass)
+ RegType = PTXRegisterType::F32;
+ else if (TRC == PTX::RegF64RegisterClass)
+ RegType = PTXRegisterType::F64;
+ MFI->addRegister(Reg, RegType, PTXRegisterSpace::Reg);
}
return false;
diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h
index 3b985f7..1a2878c 100644
--- a/lib/Target/PTX/PTXMachineFunctionInfo.h
+++ b/lib/Target/PTX/PTXMachineFunctionInfo.h
@@ -35,15 +35,22 @@ private:
DenseSet<unsigned> RegArgs;
DenseSet<unsigned> RegRets;
- typedef std::vector<unsigned> RegisterList;
- typedef DenseMap<const TargetRegisterClass*, RegisterList> RegisterMap;
- typedef DenseMap<unsigned, std::string> RegisterNameMap;
typedef DenseMap<int, std::string> FrameMap;
- RegisterMap UsedRegs;
- RegisterNameMap RegNames;
FrameMap FrameSymbols;
+ struct RegisterInfo {
+ unsigned Reg;
+ unsigned Type;
+ unsigned Space;
+ unsigned Offset;
+ unsigned Encoded;
+ };
+
+ typedef DenseMap<unsigned, RegisterInfo> RegisterInfoMap;
+
+ RegisterInfoMap RegInfo;
+
PTXParamManager ParamManager;
public:
@@ -51,13 +58,7 @@ public:
PTXMachineFunctionInfo(MachineFunction &MF)
: IsKernel(false) {
- UsedRegs[PTX::RegPredRegisterClass] = RegisterList();
- UsedRegs[PTX::RegI16RegisterClass] = RegisterList();
- UsedRegs[PTX::RegI32RegisterClass] = RegisterList();
- UsedRegs[PTX::RegI64RegisterClass] = RegisterList();
- UsedRegs[PTX::RegF32RegisterClass] = RegisterList();
- UsedRegs[PTX::RegF64RegisterClass] = RegisterList();
- }
+ }
/// getParamManager - Returns the PTXParamManager instance for this function.
PTXParamManager& getParamManager() { return ParamManager; }
@@ -78,69 +79,106 @@ public:
reg_iterator retreg_begin() const { return RegRets.begin(); }
reg_iterator retreg_end() const { return RegRets.end(); }
+ /// addRegister - Adds a virtual register to the set of all used registers
+ void addRegister(unsigned Reg, unsigned RegType, unsigned RegSpace) {
+ if (!RegInfo.count(Reg)) {
+ RegisterInfo Info;
+ Info.Reg = Reg;
+ Info.Type = RegType;
+ Info.Space = RegSpace;
+
+ // Determine register offset
+ Info.Offset = 0;
+ for(RegisterInfoMap::const_iterator i = RegInfo.begin(),
+ e = RegInfo.end(); i != e; ++i) {
+ const RegisterInfo& RI = i->second;
+ if (RI.Space == RegSpace)
+ if (RI.Space != PTXRegisterSpace::Reg || RI.Type == Info.Type)
+ Info.Offset++;
+ }
+
+ // Encode the register data into a single register number
+ Info.Encoded = (Info.Offset << 6) | (Info.Type << 3) | Info.Space;
+
+ RegInfo[Reg] = Info;
+
+ if (RegSpace == PTXRegisterSpace::Argument)
+ RegArgs.insert(Reg);
+ else if (RegSpace == PTXRegisterSpace::Return)
+ RegRets.insert(Reg);
+ }
+ }
+
+ /// countRegisters - Returns the number of registers of the given type and
+ /// space.
+ unsigned countRegisters(unsigned RegType, unsigned RegSpace) const {
+ unsigned Count = 0;
+ for(RegisterInfoMap::const_iterator i = RegInfo.begin(), e = RegInfo.end();
+ i != e; ++i) {
+ const RegisterInfo& RI = i->second;
+ if (RI.Type == RegType && RI.Space == RegSpace)
+ Count++;
+ }
+ return Count;
+ }
+
+ /// getEncodedRegister - Returns the encoded value of the register.
+ unsigned getEncodedRegister(unsigned Reg) const {
+ return RegInfo.lookup(Reg).Encoded;
+ }
+
/// addRetReg - Adds a register to the set of return-value registers.
void addRetReg(unsigned Reg) {
if (!RegRets.count(Reg)) {
RegRets.insert(Reg);
- std::string name;
- name = "%ret";
- name += utostr(RegRets.size() - 1);
- RegNames[Reg] = name;
}
}
/// addArgReg - Adds a register to the set of function argument registers.
void addArgReg(unsigned Reg) {
RegArgs.insert(Reg);
- std::string name;
- name = "%param";
- name += utostr(RegArgs.size() - 1);
- RegNames[Reg] = name;
- }
-
- /// addVirtualRegister - Adds a virtual register to the set of all used
- /// registers in the function.
- void addVirtualRegister(const TargetRegisterClass *TRC, unsigned Reg) {
- std::string name;
-
- // Do not count registers that are argument/return registers.
- if (!RegRets.count(Reg) && !RegArgs.count(Reg)) {
- UsedRegs[TRC].push_back(Reg);
- if (TRC == PTX::RegPredRegisterClass)
- name = "%p";
- else if (TRC == PTX::RegI16RegisterClass)
- name = "%rh";
- else if (TRC == PTX::RegI32RegisterClass)
- name = "%r";
- else if (TRC == PTX::RegI64RegisterClass)
- name = "%rd";
- else if (TRC == PTX::RegF32RegisterClass)
- name = "%f";
- else if (TRC == PTX::RegF64RegisterClass)
- name = "%fd";
- else
- llvm_unreachable("Invalid register class");
-
- name += utostr(UsedRegs[TRC].size() - 1);
- RegNames[Reg] = name;
- }
}
/// getRegisterName - Returns the name of the specified virtual register. This
/// name is used during PTX emission.
- const char *getRegisterName(unsigned Reg) const {
- if (RegNames.count(Reg))
- return RegNames.find(Reg)->second.c_str();
+ std::string getRegisterName(unsigned Reg) const {
+ if (RegInfo.count(Reg)) {
+ const RegisterInfo& RI = RegInfo.lookup(Reg);
+ std::string Name;
+ raw_string_ostream NameStr(Name);
+ decodeRegisterName(NameStr, RI.Encoded);
+ NameStr.flush();
+ return Name;
+ }
else if (Reg == PTX::NoRegister)
return "%noreg";
else
llvm_unreachable("Register not in register name map");
}
- /// getNumRegistersForClass - Returns the number of virtual registers that are
- /// used for the specified register class.
- unsigned getNumRegistersForClass(const TargetRegisterClass *TRC) const {
- return UsedRegs.lookup(TRC).size();
+ /// getEncodedRegisterName - Returns the name of the encoded register.
+ std::string getEncodedRegisterName(unsigned EncodedReg) const {
+ std::string Name;
+ raw_string_ostream NameStr(Name);
+ decodeRegisterName(NameStr, EncodedReg);
+ NameStr.flush();
+ return Name;
+ }
+
+ /// getRegisterType - Returns the type of the specified virtual register.
+ unsigned getRegisterType(unsigned Reg) const {
+ if (RegInfo.count(Reg))
+ return RegInfo.lookup(Reg).Type;
+ else
+ llvm_unreachable("Unknown register");
+ }
+
+ /// getOffsetForRegister - Returns the offset of the virtual register
+ unsigned getOffsetForRegister(unsigned Reg) const {
+ if (RegInfo.count(Reg))
+ return RegInfo.lookup(Reg).Offset;
+ else
+ return 0;
}
/// getFrameSymbol - Returns the symbol name for the given FrameIndex.
@@ -148,13 +186,13 @@ public:
if (FrameSymbols.count(FrameIndex)) {
return FrameSymbols.lookup(FrameIndex).c_str();
} else {
- std::string Name = "__local";
- Name += utostr(FrameIndex);
+ std::string Name = "__local";
+ Name += utostr(FrameIndex);
// The whole point of caching this name is to ensure the pointer we pass
// to any getExternalSymbol() calls will remain valid for the lifetime of
// the back-end instance. This is to work around an issue in SelectionDAG
// where symbol names are expected to be life-long strings.
- FrameSymbols[FrameIndex] = Name;
+ FrameSymbols[FrameIndex] = Name;
return FrameSymbols[FrameIndex].c_str();
}
}
diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp
index 292ea5e..4efdc27 100644
--- a/lib/Target/PTX/PTXTargetMachine.cpp
+++ b/lib/Target/PTX/PTXTargetMachine.cpp
@@ -67,30 +67,16 @@ namespace {
"e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
const char* DataLayout64 =
"e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
-
- // Copied from LLVMTargetMachine.cpp
- void printNoVerify(PassManagerBase &PM, const char *Banner) {
- if (PrintMachineCode)
- PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
- }
-
- void printAndVerify(PassManagerBase &PM,
- const char *Banner) {
- if (PrintMachineCode)
- PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
-
- //if (VerifyMachineCode)
- // PM.add(createMachineVerifierPass(Banner));
- }
}
// DataLayout and FrameLowering are filled with dummy data
PTXTargetMachine::PTXTargetMachine(const Target &T,
StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
DataLayout(is64Bit ? DataLayout64 : DataLayout32),
Subtarget(TT, CPU, FS, is64Bit),
FrameLowering(Subtarget),
@@ -101,16 +87,18 @@ PTXTargetMachine::PTXTargetMachine(const Target &T,
PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : PTXTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {
+ : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
}
PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : PTXTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {
+ : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
}
bool PTXTargetMachine::addInstSelector(PassManagerBase &PM) {
diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h
index 19f6c0f..22911f7 100644
--- a/lib/Target/PTX/PTXTargetMachine.h
+++ b/lib/Target/PTX/PTXTargetMachine.h
@@ -35,7 +35,7 @@ class PTXTargetMachine : public LLVMTargetMachine {
public:
PTXTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool is64Bit);
@@ -94,7 +94,7 @@ class PTX32TargetMachine : public PTXTargetMachine {
public:
PTX32TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
}; // class PTX32TargetMachine
@@ -103,7 +103,7 @@ class PTX64TargetMachine : public PTXTargetMachine {
public:
PTX64TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
}; // class PTX32TargetMachine
diff --git a/lib/Target/PTX/TargetInfo/CMakeLists.txt b/lib/Target/PTX/TargetInfo/CMakeLists.txt
index 2366e45..d9a5da3 100644
--- a/lib/Target/PTX/TargetInfo/CMakeLists.txt
+++ b/lib/Target/PTX/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMPTXInfo
PTXTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMPTXInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMPTXInfo PTXCommonTableGen)
diff --git a/lib/Target/PTX/TargetInfo/LLVMBuild.txt b/lib/Target/PTX/TargetInfo/LLVMBuild.txt
index 8e5285a..2cc30c4 100644
--- a/lib/Target/PTX/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/PTX/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = PTXInfo
parent = PTX
required_libraries = MC Support Target
add_to_library_groups = PTX
-
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 05c1ffd..1b85495 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -27,20 +27,6 @@ add_llvm_target(PowerPCCodeGen
PPCSelectionDAGInfo.cpp
)
-add_llvm_library_dependencies(LLVMPowerPCCodeGen
- LLVMAnalysis
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMPowerPCAsmPrinter
- LLVMPowerPCDesc
- LLVMPowerPCInfo
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
index 1d857e2..a605cc4 100644
--- a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
+++ b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
@@ -4,9 +4,4 @@ add_llvm_library(LLVMPowerPCAsmPrinter
PPCInstPrinter.cpp
)
-add_llvm_library_dependencies(LLVMPowerPCAsmPrinter
- LLVMMC
- LLVMSupport
- )
-
add_dependencies(LLVMPowerPCAsmPrinter PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt b/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
index afbb2b1..7c691de 100644
--- a/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = PowerPCAsmPrinter
parent = PowerPC
required_libraries = MC Support
add_to_library_groups = PowerPC
-
diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt
index 5baa988..95fac54 100644
--- a/lib/Target/PowerPC/LLVMBuild.txt
+++ b/lib/Target/PowerPC/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
[component_0]
type = TargetGroup
name = PowerPC
@@ -28,4 +31,3 @@ name = PowerPCCodeGen
parent = PowerPC
required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo SelectionDAG Support Target
add_to_library_groups = PowerPC
-
diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
index c4041db..febf438 100644
--- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
@@ -6,11 +6,4 @@ add_llvm_library(LLVMPowerPCDesc
PPCPredicates.cpp
)
-add_llvm_library_dependencies(LLVMPowerPCDesc
- LLVMMC
- LLVMPowerPCAsmPrinter
- LLVMPowerPCInfo
- LLVMSupport
- )
-
add_dependencies(LLVMPowerPCDesc PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt b/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
index fc2da83..d3a567d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = PowerPCDesc
parent = PowerPC
required_libraries = MC PowerPCAsmPrinter PowerPCInfo Support
add_to_library_groups = PowerPC
-
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 9f2fd6d..34a5774 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -93,6 +93,16 @@ public:
// FIXME.
return false;
}
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // FIXME.
+ assert(0 && "RelaxInstruction() unimplemented");
+ return false;
+ }
+
void RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
// FIXME.
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 56f622e..5dc2d3d 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -365,11 +365,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case PPC::MFCRpseud:
+ case PPC::MFCR8pseud:
// Transform: %R3 = MFCRpseud %CR7
// Into: %R3 = MFCR ;; cr7
OutStreamer.AddComment(PPCInstPrinter::
getRegisterName(MI->getOperand(1).getReg()));
- TmpInst.setOpcode(PPC::MFCR);
+ TmpInst.setOpcode(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR);
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
OutStreamer.EmitInstruction(TmpInst);
return;
@@ -441,7 +442,7 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
Directive = PPC::DIR_970;
if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
Directive = PPC::DIR_7400;
- if (Subtarget.isPPC64() && Directive < PPC::DIR_970)
+ if (Subtarget.isPPC64() && Directive < PPC::DIR_64)
Directive = PPC::DIR_64;
assert(Directive <= PPC::DIR_64 && "Directive out of range.");
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 4a1f182..9d2f4d0 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -138,7 +138,8 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
unsigned OpNo) const {
const MachineOperand &MO = MI.getOperand(OpNo);
- assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+ assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 ||
+ MI.getOpcode() == PPC::MFOCRF) &&
(MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
return 0x80 >> getPPCRegisterNumbering(MO.getReg());
}
@@ -248,7 +249,8 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
if (MO.isReg()) {
// MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
// The GPR operand should come through here though.
- assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
+ assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 &&
+ MI.getOpcode() != PPC::MFOCRF) ||
MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
return getPPCRegisterNumbering(MO.getReg());
}
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 0b85fea..5c45018 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -64,7 +64,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) {
// epilog blocks.
for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
// If last instruction is a return instruction, add an epilogue
- if (!I->empty() && I->back().getDesc().isReturn()) {
+ if (!I->empty() && I->back().isReturn()) {
bool FoundIt = false;
for (MBBI = I->end(); MBBI != I->begin(); ) {
--MBBI;
@@ -244,8 +244,10 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
if (MF.getFunction()->hasFnAttr(Attribute::Naked))
return false;
- return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() ||
- (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects() ||
+ (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ MF.getInfo<PPCFunctionInfo>()->hasFastCall());
}
@@ -655,7 +657,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// Callee pop calling convention. Pop parameter/linkage area. Used for tail
// call optimization
- if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
+ if (MF.getTarget().Options.GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
MF.getFunction()->getCallingConv() == CallingConv::Fast) {
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
unsigned CallerAllocatedAmt = FI->getMinReservedArea();
@@ -758,7 +760,8 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Reserve stack space to move the linkage area to in case of a tail call.
int TCSPDelta = 0;
- if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
}
@@ -769,7 +772,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// FIXME: doesn't detect whether or not we need to spill vXX, which requires
// r0 for now.
- if (RegInfo->requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable.
+ if (RegInfo->requiresRegisterScavenging(MF))
if (needsFP(MF) || spillsCR(MF)) {
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
@@ -863,7 +866,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
// Take into account stack space reserved for tail calls.
int TCSPDelta = 0;
- if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
LowerBound = TCSPDelta;
}
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 3197fc8..ae317af 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -27,7 +27,6 @@ void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) {
const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
if (!MCID) {
// This is a PPC pseudo-instruction.
- // FIXME: Should something else be done?
return;
}
@@ -62,6 +61,7 @@ void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) {
PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
: TII(tii) {
+ LastWasBL8_ELF = false;
EndDispatchGroup();
}
@@ -80,12 +80,6 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
bool &isFirst, bool &isSingle,
bool &isCracked,
bool &isLoad, bool &isStore) {
- if ((int)Opcode >= 0) {
- isFirst = isSingle = isCracked = isLoad = isStore = false;
- return PPCII::PPC970_Pseudo;
- }
- Opcode = ~Opcode;
-
const MCInstrDesc &MCID = TII.get(Opcode);
isLoad = MCID.mayLoad();
@@ -102,29 +96,23 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
/// isLoadOfStoredAddress - If we have a load from the previously stored pointer
/// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
bool PPCHazardRecognizer970::
-isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const {
+isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
+ const Value *LoadValue) const {
for (unsigned i = 0, e = NumStores; i != e; ++i) {
// Handle exact and commuted addresses.
- if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i])
- return true;
- if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i])
+ if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i])
return true;
// Okay, we don't have an exact match, if this is an indexed offset, see if
// we have overlap (which happens during fp->int conversion for example).
- if (StorePtr2[i] == Ptr2) {
- if (ConstantSDNode *StoreOffset = dyn_cast<ConstantSDNode>(StorePtr1[i]))
- if (ConstantSDNode *LoadOffset = dyn_cast<ConstantSDNode>(Ptr1)) {
- // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check
- // to see if the load and store actually overlap.
- int StoreOffs = StoreOffset->getZExtValue();
- int LoadOffs = LoadOffset->getZExtValue();
- if (StoreOffs < LoadOffs) {
- if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true;
- } else {
- if (int(LoadOffs+LoadSize) > StoreOffs) return true;
- }
- }
+ if (StoreValue[i] == LoadValue) {
+ // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check
+ // to see if the load and store actually overlap.
+ if (StoreOffset[i] < LoadOffset) {
+ if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true;
+ } else {
+ if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true;
+ }
}
}
return false;
@@ -138,13 +126,26 @@ ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
getHazardType(SUnit *SU, int Stalls) {
assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
- const SDNode *Node = SU->getNode()->getGluedMachineNode();
+ MachineInstr *MI = SU->getInstr();
+
+ if (MI->isDebugValue())
+ return NoHazard;
+
+ unsigned Opcode = MI->getOpcode();
+
+ // If the last instruction was a BL8_ELF, then the NOP must follow it
+ // directly (this is strong requirement from the linker due to the ELF ABI).
+ // We return only Hazard (and not NoopHazard) because if the NOP is necessary
+ // then it will already be in the instruction stream (it is not always
+ // necessary; tail calls, for example, do not need it).
+ if (LastWasBL8_ELF && Opcode != PPC::NOP)
+ return Hazard;
+
bool isFirst, isSingle, isCracked, isLoad, isStore;
PPCII::PPC970_Unit InstrType =
- GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+ GetInstrType(Opcode, isFirst, isSingle, isCracked,
isLoad, isStore);
if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
- unsigned Opcode = Node->getMachineOpcode();
// We can only issue a PPC970_First/PPC970_Single instruction (such as
// crand/mtspr/etc) if this is the first cycle of the dispatch group.
@@ -181,55 +182,10 @@ getHazardType(SUnit *SU, int Stalls) {
// If this is a load following a store, make sure it's not to the same or
// overlapping address.
- if (isLoad && NumStores) {
- unsigned LoadSize;
- switch (Opcode) {
- default: llvm_unreachable("Unknown load!");
- case PPC::LBZ: case PPC::LBZU:
- case PPC::LBZX:
- case PPC::LBZ8: case PPC::LBZU8:
- case PPC::LBZX8:
- case PPC::LVEBX:
- LoadSize = 1;
- break;
- case PPC::LHA: case PPC::LHAU:
- case PPC::LHAX:
- case PPC::LHZ: case PPC::LHZU:
- case PPC::LHZX:
- case PPC::LVEHX:
- case PPC::LHBRX:
- case PPC::LHA8: case PPC::LHAU8:
- case PPC::LHAX8:
- case PPC::LHZ8: case PPC::LHZU8:
- case PPC::LHZX8:
- LoadSize = 2;
- break;
- case PPC::LFS: case PPC::LFSU:
- case PPC::LFSX:
- case PPC::LWZ: case PPC::LWZU:
- case PPC::LWZX:
- case PPC::LWA:
- case PPC::LWAX:
- case PPC::LVEWX:
- case PPC::LWBRX:
- case PPC::LWZ8:
- case PPC::LWZX8:
- LoadSize = 4;
- break;
- case PPC::LFD: case PPC::LFDU:
- case PPC::LFDX:
- case PPC::LD: case PPC::LDU:
- case PPC::LDX:
- LoadSize = 8;
- break;
- case PPC::LVX:
- case PPC::LVXL:
- LoadSize = 16;
- break;
- }
-
- if (isLoadOfStoredAddress(LoadSize,
- Node->getOperand(0), Node->getOperand(1)))
+ if (isLoad && NumStores && !MI->memoperands_empty()) {
+ MachineMemOperand *MO = *MI->memoperands_begin();
+ if (isLoadOfStoredAddress(MO->getSize(),
+ MO->getOffset(), MO->getValue()))
return NoopHazard;
}
@@ -237,66 +193,29 @@ getHazardType(SUnit *SU, int Stalls) {
}
void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
- const SDNode *Node = SU->getNode()->getGluedMachineNode();
+ MachineInstr *MI = SU->getInstr();
+
+ if (MI->isDebugValue())
+ return;
+
+ unsigned Opcode = MI->getOpcode();
+ LastWasBL8_ELF = (Opcode == PPC::BL8_ELF);
+
bool isFirst, isSingle, isCracked, isLoad, isStore;
PPCII::PPC970_Unit InstrType =
- GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+ GetInstrType(Opcode, isFirst, isSingle, isCracked,
isLoad, isStore);
if (InstrType == PPCII::PPC970_Pseudo) return;
- unsigned Opcode = Node->getMachineOpcode();
// Update structural hazard information.
if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true;
// Track the address stored to.
- if (isStore) {
- unsigned ThisStoreSize;
- switch (Opcode) {
- default: llvm_unreachable("Unknown store instruction!");
- case PPC::STB: case PPC::STB8:
- case PPC::STBU: case PPC::STBU8:
- case PPC::STBX: case PPC::STBX8:
- case PPC::STVEBX:
- ThisStoreSize = 1;
- break;
- case PPC::STH: case PPC::STH8:
- case PPC::STHU: case PPC::STHU8:
- case PPC::STHX: case PPC::STHX8:
- case PPC::STVEHX:
- case PPC::STHBRX:
- ThisStoreSize = 2;
- break;
- case PPC::STFS:
- case PPC::STFSU:
- case PPC::STFSX:
- case PPC::STWX: case PPC::STWX8:
- case PPC::STWUX:
- case PPC::STW: case PPC::STW8:
- case PPC::STWU:
- case PPC::STVEWX:
- case PPC::STFIWX:
- case PPC::STWBRX:
- ThisStoreSize = 4;
- break;
- case PPC::STD_32:
- case PPC::STDX_32:
- case PPC::STD:
- case PPC::STDU:
- case PPC::STFD:
- case PPC::STFDX:
- case PPC::STDX:
- case PPC::STDUX:
- ThisStoreSize = 8;
- break;
- case PPC::STVX:
- case PPC::STVXL:
- ThisStoreSize = 16;
- break;
- }
-
- StoreSize[NumStores] = ThisStoreSize;
- StorePtr1[NumStores] = Node->getOperand(1);
- StorePtr2[NumStores] = Node->getOperand(2);
+ if (isStore && NumStores < 4 && !MI->memoperands_empty()) {
+ MachineMemOperand *MO = *MI->memoperands_begin();
+ StoreSize[NumStores] = MO->getSize();
+ StoreOffset[NumStores] = MO->getOffset();
+ StoreValue[NumStores] = MO->getValue();
++NumStores;
}
@@ -319,3 +238,9 @@ void PPCHazardRecognizer970::AdvanceCycle() {
if (NumIssued == 5)
EndDispatchGroup();
}
+
+void PPCHazardRecognizer970::Reset() {
+ LastWasBL8_ELF = false;
+ EndDispatchGroup();
+}
+
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index 32fac91..95d0d64 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -49,14 +49,18 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
// HasCTRSet - If the CTR register is set in this group, disallow BCTRL.
bool HasCTRSet;
+ // Was the last instruction issued a BL8_ELF
+ bool LastWasBL8_ELF;
+
// StoredPtr - Keep track of the address of any store. If we see a load from
// the same address (or one that aliases it), disallow the store. We can have
// up to four stores in one dispatch group, hence we track up to 4.
//
// This is null if we haven't seen a store yet. We keep track of both
// operands of the store here, since we support [r+r] and [r+i] addressing.
- SDValue StorePtr1[4], StorePtr2[4];
- unsigned StoreSize[4];
+ const Value *StoreValue[4];
+ int64_t StoreOffset[4];
+ uint64_t StoreSize[4];
unsigned NumStores;
public:
@@ -64,6 +68,7 @@ public:
virtual HazardType getHazardType(SUnit *SU, int Stalls);
virtual void EmitInstruction(SUnit *SU);
virtual void AdvanceCycle();
+ virtual void Reset();
private:
/// EndDispatchGroup - Called when we are finishing a new dispatch group.
@@ -76,8 +81,8 @@ private:
bool &isFirst, bool &isSingle,bool &isCracked,
bool &isLoad, bool &isStore);
- bool isLoadOfStoredAddress(unsigned LoadSize,
- SDValue Ptr1, SDValue Ptr2) const;
+ bool isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
+ const Value *LoadValue) const;
};
} // end namespace llvm
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 3dee406..4a509a3 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -210,13 +210,13 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
// Find all return blocks, outputting a restore in each epilog.
for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
- if (!BB->empty() && BB->back().getDesc().isReturn()) {
+ if (!BB->empty() && BB->back().isReturn()) {
IP = BB->end(); --IP;
// Skip over all terminator instructions, which are part of the return
// sequence.
MachineBasicBlock::iterator I2 = IP;
- while (I2 != BB->begin() && (--I2)->getDesc().isTerminator())
+ while (I2 != BB->begin() && (--I2)->isTerminator())
IP = I2;
// Emit: MTVRSAVE InVRSave
@@ -1066,7 +1066,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Target = N->getOperand(1);
unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
- Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Target,
+ Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
Chain), 0);
return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 36d5c41..f3a3d17 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -103,6 +103,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// from FP_ROUND: that rounds to nearest, this rounds to zero.
setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
+ // We do not currently implment this libm ops for PowerPC.
+ setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
+
// PowerPC has no SREM/UREM instructions
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
@@ -146,9 +153,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
// PowerPC does not have ROTR
setOperationAction(ISD::ROTR, MVT::i32 , Expand);
@@ -332,7 +343,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
}
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
@@ -1667,7 +1680,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Potential tail calls could cause overwriting of argument stack slots.
- bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+ bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
unsigned PtrByteSize = 4;
// Assign locations to all of the incoming arguments.
@@ -1857,7 +1871,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
bool isPPC64 = PtrVT == MVT::i64;
// Potential tail calls could cause overwriting of argument stack slots.
- bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+ bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
unsigned PtrByteSize = isPPC64 ? 8 : 4;
unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
@@ -2263,9 +2278,9 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
// Tail call needs the stack to be aligned.
- if (CC==CallingConv::Fast && GuaranteedTailCallOpt) {
- unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
- getStackAlignment();
+ if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().
+ getFrameLowering()->getStackAlignment();
unsigned AlignMask = TargetAlign-1;
NumBytes = (NumBytes + AlignMask) & ~AlignMask;
}
@@ -2299,7 +2314,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const {
- if (!GuaranteedTailCallOpt)
+ if (!getTargetMachine().Options.GuaranteedTailCallOpt)
return false;
// Variable argument functions are not supported.
@@ -2752,7 +2767,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// the stack. Account for this here so these bytes can be pushed back on in
// PPCRegisterInfo::eliminateCallFramePseudoInstr.
int BytesCalleePops =
- (CallConv==CallingConv::Fast && GuaranteedTailCallOpt) ? NumBytes : 0;
+ (CallConv == CallingConv::Fast &&
+ getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
if (InFlag.getNode())
Ops.push_back(InFlag);
@@ -2868,7 +2884,8 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
// and restoring the callers stack pointer in this functions epilog. This is
// done because by tail calling the called function might overwrite the value
// in this function's (MF) stack pointer stack slot 0(SP).
- if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
// Count how many bytes are to be pushed on the stack, including the linkage
@@ -3075,7 +3092,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// and restoring the callers stack pointer in this functions epilog. This is
// done because by tail calling the called function might overwrite the value
// in this function's (MF) stack pointer stack slot 0(SP).
- if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
unsigned nAltivecParamsAtEnd = 0;
@@ -5754,7 +5772,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setFrameAddressIsTaken(true);
- bool is31 = (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) &&
+ bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects()) &&
MFI->getStackSize() &&
!MF.getFunction()->hasFnAttr(Attribute::Naked);
unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index e88ad37..cdbc264 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -223,6 +223,18 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
(TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
+// 64-but CR instructions
+def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS),
+ "mtcrf $FXM, $rS", BrMCRX>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM),
+ "", SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins),
+ "mfcr $rT", SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
//===----------------------------------------------------------------------===//
// 64-bit SPR manipulation instrs.
@@ -469,6 +481,12 @@ def RLDICR : MDForm_1<30, 1,
(outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME),
"rldicr $rA, $rS, $SH, $ME", IntRotateD,
[]>, isPPC64;
+
+def RLWINM8 : MForm_2<21,
+ (outs G8RC:$rA), (ins G8RC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ []>;
+
} // End FXU Operations.
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index b9a6297..6d16f1d 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -33,8 +33,8 @@
#include "PPCGenInstrInfo.inc"
namespace llvm {
-extern cl::opt<bool> EnablePPC32RS; // FIXME (64-bit): See PPCRegisterInfo.cpp.
-extern cl::opt<bool> EnablePPC64RS; // FIXME (64-bit): See PPCRegisterInfo.cpp.
+extern cl::opt<bool> DisablePPC32RS;
+extern cl::opt<bool> DisablePPC64RS;
}
using namespace llvm;
@@ -48,25 +48,32 @@ PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
const TargetMachine *TM,
const ScheduleDAG *DAG) const {
- // Should use subtarget info to pick the right hazard recognizer. For
- // now, always return a PPC970 recognizer.
- const TargetInstrInfo *TII = TM->getInstrInfo();
- (void)TII;
- assert(TII && "No InstrInfo?");
-
unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective();
if (Directive == PPC::DIR_440) {
const InstrItineraryData *II = TM->getInstrItineraryData();
return new PPCHazardRecognizer440(II, DAG);
}
- else {
- // Disable the hazard recognizer for now, as it doesn't support
- // bottom-up scheduling.
- //return new PPCHazardRecognizer970(*TII);
- return new ScheduleHazardRecognizer();
- }
+
+ return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
}
+/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
+/// to use for this target when scheduling the DAG.
+ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
+ const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+
+ // Most subtargets use a PPC970 recognizer.
+ if (Directive != PPC::DIR_440) {
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ assert(TII && "No InstrInfo?");
+
+ return new PPCHazardRecognizer970(*TII);
+ }
+
+ return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG);
+}
unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
switch (MI->getOpcode()) {
@@ -338,6 +345,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
}
+// This function returns true if a CR spill is necessary and false otherwise.
bool
PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
unsigned SrcReg, bool isKill,
@@ -369,7 +377,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
FrameIdx));
} else {
// FIXME: this spills LR immediately to memory in one step. To do this,
- // we use R11, which we know cannot be used in the prolog/epilog. This is
+ // we use X11, which we know cannot be used in the prolog/epilog. This is
// a hack.
NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11));
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
@@ -388,9 +396,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
} else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) {
- if ((EnablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
- (EnablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
- // FIXME (64-bit): Enable
+ if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
+ (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
.addReg(SrcReg,
getKillRegState(isKill)),
@@ -403,11 +410,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
// We hack this on Darwin by reserving R2. It's probably broken on Linux
// at the moment.
+ bool is64Bit = TM.getSubtargetImpl()->isPPC64();
// We need to store the CR in the low 4-bits of the saved value. First,
// issue a MFCR to save all of the CRBits.
unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
- PPC::R2 : PPC::R0;
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg)
+ (is64Bit ? PPC::X2 : PPC::R2) :
+ (is64Bit ? PPC::X0 : PPC::R0);
+ NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud :
+ PPC::MFCRpseud), ScratchReg)
.addReg(SrcReg, getKillRegState(isKill)));
// If the saved register wasn't CR0, shift the bits left so that they are
@@ -415,12 +425,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
if (SrcReg != PPC::CR0) {
unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4;
// rlwinm scratch, scratch, ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+ NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 :
+ PPC::RLWINM), ScratchReg)
.addReg(ScratchReg).addImm(ShiftBits)
.addImm(0).addImm(31));
}
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ?
+ PPC::STW8 : PPC::STW))
.addReg(ScratchReg,
getKillRegState(isKill)),
FrameIdx));
@@ -504,7 +516,7 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
NewMIs.back()->addMemOperand(MF, MMO);
}
-void
+bool
PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
@@ -524,8 +536,8 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
FrameIdx));
} else {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD),
- PPC::R11), FrameIdx));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::R11));
+ PPC::X11), FrameIdx));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::X11));
}
} else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
@@ -534,28 +546,37 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
FrameIdx));
} else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) {
- // FIXME: We need a scatch reg here. The trouble with using R0 is that
- // it's possible for the stack frame to be so big the save location is
- // out of range of immediate offsets, necessitating another register.
- // We hack this on Darwin by reserving R2. It's probably broken on Linux
- // at the moment.
- unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
- PPC::R2 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
- ScratchReg), FrameIdx));
-
- // If the reloaded register isn't CR0, shift the bits right so that they are
- // in the right CR's slot.
- if (DestReg != PPC::CR0) {
- unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
- // rlwinm r11, r11, 32-ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
- .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
- .addImm(31));
+ if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
+ (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_CR), DestReg)
+ , FrameIdx));
+ return true;
+ } else {
+ // FIXME: We need a scatch reg here. The trouble with using R0 is that
+ // it's possible for the stack frame to be so big the save location is
+ // out of range of immediate offsets, necessitating another register.
+ // We hack this on Darwin by reserving R2. It's probably broken on Linux
+ // at the moment.
+ unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+ PPC::R2 : PPC::R0;
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ ScratchReg), FrameIdx));
+
+ // If the reloaded register isn't CR0, shift the bits right so that they are
+ // in the right CR's slot.
+ if (DestReg != PPC::CR0) {
+ unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
+ // rlwinm r11, r11, 32-ShiftBits, 0, 31.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+ .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
+ .addImm(31));
+ }
+
+ NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ?
+ PPC::MTCRF8 : PPC::MTCRF), DestReg)
+ .addReg(ScratchReg));
}
-
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg)
- .addReg(ScratchReg));
} else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) {
unsigned Reg = 0;
@@ -600,6 +621,8 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
} else {
llvm_unreachable("Unknown regclass!");
}
+
+ return false;
}
void
@@ -612,7 +635,10 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
SmallVector<MachineInstr*, 4> NewMIs;
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
- LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
+ if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) {
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setSpillsCR();
+ }
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 90bacc9..e90f8cb 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -72,7 +72,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs) const;
- void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+ bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs) const;
@@ -88,6 +88,9 @@ public:
ScheduleHazardRecognizer *
CreateTargetHazardRecognizer(const TargetMachine *TM,
const ScheduleDAG *DAG) const;
+ ScheduleHazardRecognizer *
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const;
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 17f63e0..d4c9d10 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -349,7 +349,7 @@ def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.
-def FPContractions : Predicate<"!NoExcessFPPrecision">;
+def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">;
def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
def IsBookE : Predicate<"PPCSubTarget.isBookE()">;
@@ -399,7 +399,14 @@ let usesCustomInserter = 1, // Expanded after instruction selection.
// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
// scavenge a register for it.
-def SPILL_CR : Pseudo<(outs), (ins GPRC:$cond, memri:$F),
+let mayStore = 1 in
+def SPILL_CR : Pseudo<(outs), (ins CRRC:$cond, memri:$F),
+ "", []>;
+
+// RESTORE_CR - Indicate that we're restoring the CR register (previously
+// spilled), so we'll need to scavenge a register for it.
+let mayLoad = 1 in
+def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F),
"", []>;
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
@@ -1091,7 +1098,7 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
"mfspr $rT, 256", IntGeneral>,
PPC970_DGroup_First, PPC970_Unit_FXU;
-def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS),
+def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 3ba9260..27f7f4a 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -46,15 +46,14 @@
#define GET_REGINFO_TARGET_DESC
#include "PPCGenRegisterInfo.inc"
-// FIXME (64-bit): Eventually enable by default.
namespace llvm {
-cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger",
+cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger",
cl::init(false),
- cl::desc("Enable PPC32 register scavenger"),
+ cl::desc("Disable PPC32 register scavenger"),
cl::Hidden);
-cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger",
+cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger",
cl::init(false),
- cl::desc("Enable PPC64 register scavenger"),
+ cl::desc("Disable PPC64 register scavenger"),
cl::Hidden);
}
@@ -63,8 +62,8 @@ using namespace llvm;
// FIXME (64-bit): Should be inlined.
bool
PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
- return ((EnablePPC32RS && !Subtarget.isPPC64()) ||
- (EnablePPC64RS && Subtarget.isPPC64()));
+ return ((!DisablePPC32RS && !Subtarget.isPPC64()) ||
+ (!DisablePPC64RS && Subtarget.isPPC64()));
}
PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
@@ -120,10 +119,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
PPC::V24, PPC::V25, PPC::V26, PPC::V27,
PPC::V28, PPC::V29, PPC::V30, PPC::V31,
- PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
- PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
- PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
-
PPC::LR, 0
};
@@ -149,10 +144,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
PPC::V24, PPC::V25, PPC::V26, PPC::V27,
PPC::V28, PPC::V29, PPC::V30, PPC::V31,
- PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
- PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
- PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
-
0
};
// 64-bit Darwin calling convention.
@@ -174,10 +165,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
PPC::V24, PPC::V25, PPC::V26, PPC::V27,
PPC::V28, PPC::V29, PPC::V30, PPC::V31,
- PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
- PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
- PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
-
PPC::LR8, 0
};
@@ -203,10 +190,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
PPC::V24, PPC::V25, PPC::V26, PPC::V27,
PPC::V28, PPC::V29, PPC::V30, PPC::V31,
- PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
- PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
- PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
-
0
};
@@ -247,9 +230,6 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::R13);
Reserved.set(PPC::R31);
- if (!requiresRegisterScavenging(MF))
- Reserved.set(PPC::R0); // FIXME (64-bit): Remove
-
Reserved.set(PPC::X0);
Reserved.set(PPC::X1);
Reserved.set(PPC::X13);
@@ -259,7 +239,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (Subtarget.isSVR4ABI()) {
Reserved.set(PPC::X2);
}
- // Reserve R2 on Darwin to hack around the problem of save/restore of CR
+ // Reserve X2 on Darwin to hack around the problem of save/restore of CR
// when the stack frame is too big to address directly; we need two regs.
// This is a hack.
if (Subtarget.isDarwinABI()) {
@@ -291,6 +271,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case PPC::F4RCRegClassID:
case PPC::VRRCRegClassID:
return 32 - DefaultSafety;
+ case PPC::CRRCRegClassID:
+ return 8 - DefaultSafety;
}
}
@@ -301,7 +283,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
void PPCRegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- if (GuaranteedTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) {
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ I->getOpcode() == PPC::ADJCALLSTACKUP) {
// Add (actually subtract) back the amount the callee popped on return.
if (int CalleeAmt = I->getOperand(1).getImm()) {
bool is64Bit = Subtarget.isPPC64();
@@ -476,28 +459,32 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
unsigned FrameIndex, int SPAdj,
RegScavenger *RS) const {
// Get the instruction.
- MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset>, <FI>
+ MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset>
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc dl = MI.getDebugLoc();
- const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
- unsigned Reg = findScratchRegister(II, RS, RC, SPAdj);
- unsigned SrcReg = MI.getOperand(0).getReg();
+ // FIXME: Once LLVM supports creating virtual registers here, or the register
+ // scavenger can return multiple registers, stop using reserved registers
+ // here.
+ (void) SPAdj;
+ (void) RS;
+
bool LP64 = Subtarget.isPPC64();
+ unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) :
+ (LP64 ? PPC::X0 : PPC::R0);
+ unsigned SrcReg = MI.getOperand(0).getReg();
// We need to store the CR in the low 4-bits of the saved value. First, issue
// an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg.
- BuildMI(MBB, II, dl, TII.get(PPC::MFCRpseud), Reg)
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFCR8pseud : PPC::MFCRpseud), Reg)
.addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
// If the saved register wasn't CR0, shift the bits left so that they are in
// CR0's slot.
if (SrcReg != PPC::CR0)
// rlwinm rA, rA, ShiftBits, 0, 31.
- BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg)
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
.addReg(Reg, RegState::Kill)
.addImm(getPPCRegisterNumbering(SrcReg) * 4)
.addImm(0)
@@ -511,6 +498,48 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
MBB.erase(II);
}
+void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex, int SPAdj,
+ RegScavenger *RS) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CR <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ // FIXME: Once LLVM supports creating virtual registers here, or the register
+ // scavenger can return multiple registers, stop using reserved registers
+ // here.
+ (void) SPAdj;
+ (void) RS;
+
+ bool LP64 = Subtarget.isPPC64();
+ unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) :
+ (LP64 ? PPC::X0 : PPC::R0);
+ unsigned DestReg = MI.getOperand(0).getReg();
+ assert(MI.definesRegister(DestReg) &&
+ "RESTORE_CR does not define its destination");
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LWZ8 : PPC::LWZ),
+ Reg), FrameIndex);
+
+ // If the reloaded register isn't CR0, shift the bits right so that they are
+ // in the right CR's slot.
+ if (DestReg != PPC::CR0) {
+ unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
+ // rlwinm r11, r11, 32-ShiftBits, 0, 31.
+ BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg)
+ .addReg(Reg).addImm(32-ShiftBits).addImm(0)
+ .addImm(31);
+ }
+
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTCRF8 : PPC::MTCRF), DestReg)
+ .addReg(Reg);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
void
PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS) const {
@@ -556,16 +585,23 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
return;
}
- // Special case for pseudo-op SPILL_CR.
- if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable by default.
+ // Special case for pseudo-ops SPILL_CR and RESTORE_CR.
+ if (requiresRegisterScavenging(MF)) {
if (OpC == PPC::SPILL_CR) {
lowerCRSpilling(II, FrameIndex, SPAdj, RS);
return;
+ } else if (OpC == PPC::RESTORE_CR) {
+ lowerCRRestore(II, FrameIndex, SPAdj, RS);
+ return;
}
+ }
// Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
+
+ bool is64Bit = Subtarget.isPPC64();
MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ?
- PPC::R31 : PPC::R1,
+ (is64Bit ? PPC::X31 : PPC::R31) :
+ (is64Bit ? PPC::X1 : PPC::R1),
false);
// Figure out if the offset in the instruction is shifted right two bits. This
@@ -611,19 +647,19 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// The offset doesn't fit into a single register, scavenge one to build the
// offset in.
- // FIXME: figure out what SPAdj is doing here.
- // FIXME (64-bit): Use "findScratchRegister".
unsigned SReg;
- if (requiresRegisterScavenging(MF))
- SReg = findScratchRegister(II, RS, &PPC::GPRCRegClass, SPAdj);
- else
- SReg = PPC::R0;
+ if (requiresRegisterScavenging(MF)) {
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj);
+ } else
+ SReg = is64Bit ? PPC::X0 : PPC::R0;
// Insert a set of rA with the full offset value before the ld, st, or add
- BuildMI(MBB, II, dl, TII.get(PPC::LIS), SReg)
+ BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg)
.addImm(Offset >> 16);
- BuildMI(MBB, II, dl, TII.get(PPC::ORI), SReg)
+ BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg)
.addReg(SReg, RegState::Kill)
.addImm(Offset);
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index f70a594..faf690f 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -57,6 +57,8 @@ public:
int SPAdj, RegScavenger *RS) const;
void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
int SPAdj, RegScavenger *RS) const;
+ void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex,
+ int SPAdj, RegScavenger *RS) const;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS = NULL) const;
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 8acf75c..baa0eb5 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "PPCSubtarget.h"
+#include "PPCRegisterInfo.h"
#include "PPC.h"
#include "llvm/GlobalValue.h"
#include "llvm/Target/TargetMachine.h"
@@ -140,3 +141,22 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
GV->hasCommonLinkage() || isDecl;
}
+
+bool PPCSubtarget::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ if (DarwinDirective == PPC::DIR_440)
+ return false;
+
+ Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ CriticalPathRCs.clear();
+
+ if (isPPC64())
+ CriticalPathRCs.push_back(&PPC::G8RCRegClass);
+ else
+ CriticalPathRCs.push_back(&PPC::GPRCRegClass);
+
+ return OptLevel >= CodeGenOpt::Default;
+}
+
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index d2b853d..62b2424 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -148,6 +148,10 @@ public:
bool isDarwinABI() const { return isDarwin(); }
bool isSVR4ABI() const { return !isDarwin(); }
+ /// enablePostRAScheduler - True at 'More' optimization.
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
};
} // End llvm namespace
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index de8fca0..8e71c46 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -28,10 +28,11 @@ extern "C" void LLVMInitializePowerPCTarget() {
PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS, is64Bit),
DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
FrameLowering(Subtarget), JITInfo(*this, is64Bit),
@@ -45,17 +46,19 @@ bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; }
PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : PPCTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {
+ : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
}
PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : PPCTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {
+ : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
}
@@ -81,7 +84,7 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
if (Subtarget.isPPC64())
// Temporary workaround for the inability of PPC64 JIT to handle jump
// tables.
- DisableJumpTables = true;
+ Options.DisableJumpTables = true;
// Inform the subtarget that we are in JIT mode. FIXME: does this break macho
// writing?
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 03b27c6..0427876 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -41,7 +41,7 @@ class PPCTargetMachine : public LLVMTargetMachine {
public:
PPCTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64Bit);
@@ -79,7 +79,7 @@ public:
class PPC32TargetMachine : public PPCTargetMachine {
public:
PPC32TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
@@ -89,7 +89,7 @@ public:
class PPC64TargetMachine : public PPCTargetMachine {
public:
PPC64TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
diff --git a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
index f63111f..fdb8a62 100644
--- a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
+++ b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMPowerPCInfo
PowerPCTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMPowerPCInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMPowerPCInfo PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
index f51b417..f77d85b 100644
--- a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = PowerPCInfo
parent = PowerPC
required_libraries = MC Support Target
add_to_library_groups = PowerPC
-
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index 9687951..56ee7c2 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -22,17 +22,5 @@ add_llvm_target(SparcCodeGen
SparcSelectionDAGInfo.cpp
)
-add_llvm_library_dependencies(LLVMSparcCodeGen
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSelectionDAG
- LLVMSparcDesc
- LLVMSparcInfo
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index dab35e5..9295408 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -100,7 +100,7 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
- if (I->getDesc().hasDelaySlot()) {
+ if (I->hasDelaySlot()) {
MachineBasicBlock::iterator D = MBB.end();
MachineBasicBlock::iterator J = I;
@@ -149,7 +149,7 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
}
//Call's delay filler can def some of call's uses.
- if (slot->getDesc().isCall())
+ if (slot->isCall())
insertCallUses(slot, RegUses);
else
insertDefsUses(slot, RegDefs, RegUses);
@@ -170,7 +170,7 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
if (I->hasUnmodeledSideEffects()
|| I->isInlineAsm()
|| I->isLabel()
- || I->getDesc().hasDelaySlot()
+ || I->hasDelaySlot()
|| isDelayFiller(MBB, I))
break;
@@ -194,13 +194,13 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
if (candidate->isImplicitDef() || candidate->isKill())
return true;
- if (candidate->getDesc().mayLoad()) {
+ if (candidate->mayLoad()) {
sawLoad = true;
if (sawStore)
return true;
}
- if (candidate->getDesc().mayStore()) {
+ if (candidate->mayStore()) {
if (sawStore)
return true;
sawStore = true;
@@ -298,13 +298,13 @@ bool Filler::isDelayFiller(MachineBasicBlock &MBB,
return false;
if (candidate->getOpcode() == SP::UNIMP)
return true;
- const MCInstrDesc &prevdesc = (--candidate)->getDesc();
- return prevdesc.hasDelaySlot();
+ --candidate;
+ return candidate->hasDelaySlot();
}
bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize)
{
- if (!I->getDesc().isCall())
+ if (!I->isCall())
return false;
unsigned structSizeOpNum = 0;
diff --git a/lib/Target/Sparc/LLVMBuild.txt b/lib/Target/Sparc/LLVMBuild.txt
index 38c797f..fe20d2f 100644
--- a/lib/Target/Sparc/LLVMBuild.txt
+++ b/lib/Target/Sparc/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = MCTargetDesc TargetInfo
+
[component_0]
type = TargetGroup
name = Sparc
@@ -27,4 +30,3 @@ name = SparcCodeGen
parent = Sparc
required_libraries = AsmPrinter CodeGen Core MC SelectionDAG SparcDesc SparcInfo Support Target
add_to_library_groups = Sparc
-
diff --git a/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
index d3bdf0b..9d4db4d 100644
--- a/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
@@ -3,10 +3,4 @@ add_llvm_library(LLVMSparcDesc
SparcMCAsmInfo.cpp
)
-add_llvm_library_dependencies(LLVMSparcDesc
- LLVMMC
- LLVMSparcInfo
- LLVMSupport
- )
-
add_dependencies(LLVMSparcDesc SparcCommonTableGen)
diff --git a/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt b/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt
index a339cec..97f8f16 100644
--- a/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = SparcDesc
parent = Sparc
required_libraries = MC SparcInfo Support
add_to_library_groups = Sparc
-
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index deb39d9..7548bbf 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -236,9 +236,9 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
// Check if the last terminator is an unconditional branch.
MachineBasicBlock::const_iterator I = Pred->end();
- while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+ while (I != Pred->begin() && !(--I)->isTerminator())
; // Noop
- return I == Pred->end() || !I->getDesc().isBarrier();
+ return I == Pred->end() || !I->isBarrier();
}
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 25104d1..3608d3b 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -763,7 +763,9 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FMA , MVT::f32, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::ROTL , MVT::i32, Expand);
setOperationAction(ISD::ROTR , MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 7a6bf50..5290d42 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -133,7 +133,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
break;
//Terminator is not a branch
- if (!I->getDesc().isBranch())
+ if (!I->isBranch())
return true;
//Handle Unconditional branches
@@ -195,7 +195,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
.addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode);
BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA))
.addMBB(TargetBB);
- MBB.addSuccessor(TargetBB);
+
OldInst->eraseFromParent();
UnCondBrIter->eraseFromParent();
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 7dff799..8e16fd7 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -26,10 +26,11 @@ extern "C" void LLVMInitializeSparcTarget() {
///
SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool is64bit)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS, is64bit),
DataLayout(Subtarget.getDataLayout()),
TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
@@ -52,16 +53,20 @@ bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM){
SparcV8TargetMachine::SparcV8TargetMachine(const Target &T,
StringRef TT, StringRef CPU,
- StringRef FS, Reloc::Model RM,
+ StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL)
- : SparcTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {
+ : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
}
SparcV9TargetMachine::SparcV9TargetMachine(const Target &T,
StringRef TT, StringRef CPU,
- StringRef FS, Reloc::Model RM,
+ StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL)
- : SparcTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {
+ : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
}
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 63bfa5d..cedc1e3 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -34,9 +34,9 @@ class SparcTargetMachine : public LLVMTargetMachine {
SparcFrameLowering FrameLowering;
public:
SparcTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL, bool is64bit);
+ CodeGenOpt::Level OL, bool is64bit);
virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetFrameLowering *getFrameLowering() const {
@@ -65,6 +65,7 @@ class SparcV8TargetMachine : public SparcTargetMachine {
public:
SparcV8TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
@@ -75,6 +76,7 @@ class SparcV9TargetMachine : public SparcTargetMachine {
public:
SparcV9TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
diff --git a/lib/Target/Sparc/TargetInfo/CMakeLists.txt b/lib/Target/Sparc/TargetInfo/CMakeLists.txt
index a076023..b0d031e 100644
--- a/lib/Target/Sparc/TargetInfo/CMakeLists.txt
+++ b/lib/Target/Sparc/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMSparcInfo
SparcTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMSparcInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMSparcInfo SparcCommonTableGen)
diff --git a/lib/Target/Sparc/TargetInfo/LLVMBuild.txt b/lib/Target/Sparc/TargetInfo/LLVMBuild.txt
index 81c9032..b5c320f 100644
--- a/lib/Target/Sparc/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/Sparc/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = SparcInfo
parent = Sparc
required_libraries = MC Support Target
add_to_library_groups = Sparc
-
diff --git a/lib/Target/TargetFrameLowering.cpp b/lib/Target/TargetFrameLowering.cpp
deleted file mode 100644
index 122f869..0000000
--- a/lib/Target/TargetFrameLowering.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-//===----- TargetFrameLowering.cpp - Implement target frame interface ------==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Implements the layout of a stack frame on the target machine.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-
-#include <cstdlib>
-using namespace llvm;
-
-TargetFrameLowering::~TargetFrameLowering() {
-}
-
-/// getFrameIndexOffset - Returns the displacement from the frame register to
-/// the stack frame of the specified index. This is the default implementation
-/// which is overridden for some targets.
-int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return MFI->getObjectOffset(FI) + MFI->getStackSize() -
- getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
-}
-
-int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
- int FI, unsigned &FrameReg) const {
- const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
-
- // By default, assume all frame indices are referenced via whatever
- // getFrameRegister() says. The target can override this if it's doing
- // something different.
- FrameReg = RI->getFrameRegister(MF);
- return getFrameIndexOffset(MF, FI);
-}
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
index d52ecb3..440f9ad 100644
--- a/lib/Target/TargetInstrInfo.cpp
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -13,7 +13,6 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/ErrorHandling.h"
@@ -73,23 +72,6 @@ TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
}
-int
-TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
- SDNode *DefNode, unsigned DefIdx,
- SDNode *UseNode, unsigned UseIdx) const {
- if (!ItinData || ItinData->isEmpty())
- return -1;
-
- if (!DefNode->isMachineOpcode())
- return -1;
-
- unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
- if (!UseNode->isMachineOpcode())
- return ItinData->getOperandCycle(DefClass, DefIdx);
- unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
- return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
-}
-
int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
@@ -99,17 +81,6 @@ int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
return ItinData->getStageLatency(MI->getDesc().getSchedClass());
}
-int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
- SDNode *N) const {
- if (!ItinData || ItinData->isEmpty())
- return 1;
-
- if (!N->isMachineOpcode())
- return 1;
-
- return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
-}
-
bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI,
unsigned DefIdx) const {
@@ -129,19 +100,6 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
}
-bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isTerminator()) return false;
-
- // Conditional branch is a special case.
- if (MCID.isBranch() && !MCID.isBarrier())
- return true;
- if (!MCID.isPredicable())
- return true;
- return !isPredicated(MI);
-}
-
-
/// Measure the specified inline asm to determine an approximation of its
/// length.
/// Comments (which run till the next SeparatorString or newline) do not
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index aa2e014..768facb 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -22,15 +22,96 @@ char TargetLibraryInfo::ID = 0;
const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
{
- "memset",
+ "acos",
+ "acosl",
+ "acosf",
+ "asin",
+ "asinl",
+ "asinf",
+ "atan",
+ "atanl",
+ "atanf",
+ "atan2",
+ "atan2l",
+ "atan2f",
+ "ceil",
+ "ceill",
+ "ceilf",
+ "copysign",
+ "copysignf",
+ "copysignl",
+ "cos",
+ "cosl",
+ "cosf",
+ "cosh",
+ "coshl",
+ "coshf",
+ "exp",
+ "expl",
+ "expf",
+ "exp2",
+ "exp2l",
+ "exp2f",
+ "expm1",
+ "expm1l",
+ "expl1f",
+ "fabs",
+ "fabsl",
+ "fabsf",
+ "floor",
+ "floorl",
+ "floorf",
+ "fiprintf",
+ "fmod",
+ "fmodl",
+ "fmodf",
+ "fputs",
+ "fwrite",
+ "iprintf",
+ "log",
+ "logl",
+ "logf",
+ "log2",
+ "log2l",
+ "log2f",
+ "log10",
+ "log10l",
+ "log10f",
+ "log1p",
+ "log1pl",
+ "log1pf",
"memcpy",
"memmove",
+ "memset",
"memset_pattern16",
- "iprintf",
+ "nearbyint",
+ "nearbyintf",
+ "nearbyintl",
+ "pow",
+ "powf",
+ "powl",
+ "rint",
+ "rintf",
+ "rintl",
+ "sin",
+ "sinl",
+ "sinf",
+ "sinh",
+ "sinhl",
+ "sinhf",
"siprintf",
- "fiprintf",
- "fwrite",
- "fputs"
+ "sqrt",
+ "sqrtl",
+ "sqrtf",
+ "tan",
+ "tanl",
+ "tanf",
+ "tanh",
+ "tanhl",
+ "tanhf",
+ "trunc",
+ "truncf",
+ "truncl"
};
/// initialize - Initialize the set of available library functions based on the
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 56b7b69..fc8b67b 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -48,7 +48,7 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx,
TargetLoweringObjectFile::~TargetLoweringObjectFile() {
}
-static bool isSuitableForBSS(const GlobalVariable *GV) {
+static bool isSuitableForBSS(const GlobalVariable *GV, bool NoZerosInBSS) {
const Constant *C = GV->getInitializer();
// Must have zero initializer.
@@ -133,7 +133,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
// Handle thread-local data first.
if (GVar->isThreadLocal()) {
- if (isSuitableForBSS(GVar))
+ if (isSuitableForBSS(GVar, TM.Options.NoZerosInBSS))
return SectionKind::getThreadBSS();
return SectionKind::getThreadData();
}
@@ -143,7 +143,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
return SectionKind::getCommon();
// Variable can be easily put to BSS section.
- if (isSuitableForBSS(GVar)) {
+ if (isSuitableForBSS(GVar, TM.Options.NoZerosInBSS)) {
if (GVar->hasLocalLinkage())
return SectionKind::getBSSLocal();
else if (GVar->hasExternalLinkage())
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 805e16e..fb7bbbb 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -24,153 +22,11 @@ using namespace llvm;
//
namespace llvm {
- bool LessPreciseFPMADOption;
- bool PrintMachineCode;
- bool NoFramePointerElim;
- bool NoFramePointerElimNonLeaf;
- bool NoExcessFPPrecision;
- bool UnsafeFPMath;
- bool NoInfsFPMath;
- bool NoNaNsFPMath;
- bool HonorSignDependentRoundingFPMathOption;
- bool UseSoftFloat;
- FloatABI::ABIType FloatABIType;
- bool NoImplicitFloat;
- bool NoZerosInBSS;
- bool JITExceptionHandling;
- bool JITEmitDebugInfo;
- bool JITEmitDebugInfoToDisk;
- bool GuaranteedTailCallOpt;
- unsigned StackAlignmentOverride;
- bool RealignStack;
- bool DisableJumpTables;
bool StrongPHIElim;
bool HasDivModLibcall;
bool AsmVerbosityDefault(false);
- bool EnableSegmentedStacks;
}
-static cl::opt<bool, true>
-PrintCode("print-machineinstrs",
- cl::desc("Print generated machine code"),
- cl::location(PrintMachineCode), cl::init(false));
-static cl::opt<bool, true>
-DisableFPElim("disable-fp-elim",
- cl::desc("Disable frame pointer elimination optimization"),
- cl::location(NoFramePointerElim),
- cl::init(false));
-static cl::opt<bool, true>
-DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
- cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"),
- cl::location(NoFramePointerElimNonLeaf),
- cl::init(false));
-static cl::opt<bool, true>
-DisableExcessPrecision("disable-excess-fp-precision",
- cl::desc("Disable optimizations that may increase FP precision"),
- cl::location(NoExcessFPPrecision),
- cl::init(false));
-static cl::opt<bool, true>
-EnableFPMAD("enable-fp-mad",
- cl::desc("Enable less precise MAD instructions to be generated"),
- cl::location(LessPreciseFPMADOption),
- cl::init(false));
-static cl::opt<bool, true>
-EnableUnsafeFPMath("enable-unsafe-fp-math",
- cl::desc("Enable optimizations that may decrease FP precision"),
- cl::location(UnsafeFPMath),
- cl::init(false));
-static cl::opt<bool, true>
-EnableNoInfsFPMath("enable-no-infs-fp-math",
- cl::desc("Enable FP math optimizations that assume no +-Infs"),
- cl::location(NoInfsFPMath),
- cl::init(false));
-static cl::opt<bool, true>
-EnableNoNaNsFPMath("enable-no-nans-fp-math",
- cl::desc("Enable FP math optimizations that assume no NaNs"),
- cl::location(NoNaNsFPMath),
- cl::init(false));
-static cl::opt<bool, true>
-EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
- cl::Hidden,
- cl::desc("Force codegen to assume rounding mode can change dynamically"),
- cl::location(HonorSignDependentRoundingFPMathOption),
- cl::init(false));
-static cl::opt<bool, true>
-GenerateSoftFloatCalls("soft-float",
- cl::desc("Generate software floating point library calls"),
- cl::location(UseSoftFloat),
- cl::init(false));
-static cl::opt<llvm::FloatABI::ABIType, true>
-FloatABIForCalls("float-abi",
- cl::desc("Choose float ABI type"),
- cl::location(FloatABIType),
- cl::init(FloatABI::Default),
- cl::values(
- clEnumValN(FloatABI::Default, "default",
- "Target default float ABI type"),
- clEnumValN(FloatABI::Soft, "soft",
- "Soft float ABI (implied by -soft-float)"),
- clEnumValN(FloatABI::Hard, "hard",
- "Hard float ABI (uses FP registers)"),
- clEnumValEnd));
-static cl::opt<bool, true>
-DontPlaceZerosInBSS("nozero-initialized-in-bss",
- cl::desc("Don't place zero-initialized symbols into bss section"),
- cl::location(NoZerosInBSS),
- cl::init(false));
-static cl::opt<bool, true>
-EnableJITExceptionHandling("jit-enable-eh",
- cl::desc("Emit exception handling information"),
- cl::location(JITExceptionHandling),
- cl::init(false));
-// In debug builds, make this default to true.
-#ifdef NDEBUG
-#define EMIT_DEBUG false
-#else
-#define EMIT_DEBUG true
-#endif
-static cl::opt<bool, true>
-EmitJitDebugInfo("jit-emit-debug",
- cl::desc("Emit debug information to debugger"),
- cl::location(JITEmitDebugInfo),
- cl::init(EMIT_DEBUG));
-#undef EMIT_DEBUG
-static cl::opt<bool, true>
-EmitJitDebugInfoToDisk("jit-emit-debug-to-disk",
- cl::Hidden,
- cl::desc("Emit debug info objfiles to disk"),
- cl::location(JITEmitDebugInfoToDisk),
- cl::init(false));
-
-static cl::opt<bool, true>
-EnableGuaranteedTailCallOpt("tailcallopt",
- cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
- cl::location(GuaranteedTailCallOpt),
- cl::init(false));
-static cl::opt<unsigned, true>
-OverrideStackAlignment("stack-alignment",
- cl::desc("Override default stack alignment"),
- cl::location(StackAlignmentOverride),
- cl::init(0));
-static cl::opt<bool, true>
-EnableRealignStack("realign-stack",
- cl::desc("Realign stack if needed"),
- cl::location(RealignStack),
- cl::init(true));
-static cl::opt<bool, true>
-DisableSwitchTables(cl::Hidden, "disable-jump-tables",
- cl::desc("Do not generate jump tables."),
- cl::location(DisableJumpTables),
- cl::init(false));
-static cl::opt<bool, true>
-EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
- cl::desc("Use strong PHI elimination."),
- cl::location(StrongPHIElim),
- cl::init(false));
-static cl::opt<std::string>
-TrapFuncName("trap-func", cl::Hidden,
- cl::desc("Emit a call to trap function rather than a trap instruction"),
- cl::init(""));
static cl::opt<bool>
DataSections("fdata-sections",
cl::desc("Emit data into separate sections"),
@@ -179,18 +35,14 @@ static cl::opt<bool>
FunctionSections("ffunction-sections",
cl::desc("Emit functions into separate sections"),
cl::init(false));
-static cl::opt<bool, true>
-SegmentedStacks("segmented-stacks",
- cl::desc("Use segmented stacks if possible."),
- cl::location(EnableSegmentedStacks),
- cl::init(false));
//---------------------------------------------------------------------------
// TargetMachine Class
//
TargetMachine::TargetMachine(const Target &T,
- StringRef TT, StringRef CPU, StringRef FS)
+ StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options)
: TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS),
CodeGenInfo(0), AsmInfo(0),
MCRelaxAll(false),
@@ -198,11 +50,8 @@ TargetMachine::TargetMachine(const Target &T,
MCSaveTempLabels(false),
MCUseLoc(true),
MCUseCFI(true),
- MCUseDwarfDirectory(false) {
- // Typically it will be subtargets that will adjust FloatABIType from Default
- // to Soft or Hard.
- if (UseSoftFloat)
- FloatABIType = FloatABI::Soft;
+ MCUseDwarfDirectory(false),
+ Options(Options) {
}
TargetMachine::~TargetMachine() {
@@ -258,36 +107,3 @@ void TargetMachine::setDataSections(bool V) {
DataSections = V;
}
-namespace llvm {
- /// DisableFramePointerElim - This returns true if frame pointer elimination
- /// optimization should be disabled for the given machine function.
- bool DisableFramePointerElim(const MachineFunction &MF) {
- // Check to see if we should eliminate non-leaf frame pointers and then
- // check to see if we should eliminate all frame pointers.
- if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return MFI->hasCalls();
- }
-
- return NoFramePointerElim;
- }
-
- /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
- /// is specified on the command line. When this flag is off(default), the
- /// code generator is not allowed to generate mad (multiply add) if the
- /// result is "less precise" than doing those operations individually.
- bool LessPreciseFPMAD() { return UnsafeFPMath || LessPreciseFPMADOption; }
-
- /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
- /// that the rounding mode of the FPU can change from its default.
- bool HonorSignDependentRoundingFPMath() {
- return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
- }
-
- /// getTrapFunctionName - If this returns a non-empty string, this means isel
- /// should lower Intrinsic::trap to a call to the specified function name
- /// instead of an ISD::TRAP node.
- StringRef getTrapFunctionName() {
- return TrapFuncName;
- }
-}
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index 67239b8..2689837 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -13,8 +13,6 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt
index 94aca7a..47489bb 100644
--- a/lib/Target/X86/AsmParser/CMakeLists.txt
+++ b/lib/Target/X86/AsmParser/CMakeLists.txt
@@ -5,12 +5,4 @@ add_llvm_library(LLVMX86AsmParser
X86AsmParser.cpp
)
-add_llvm_library_dependencies(LLVMX86AsmParser
- LLVMMC
- LLVMMCParser
- LLVMSupport
- LLVMX86Desc
- LLVMX86Info
- )
-
add_dependencies(LLVMX86AsmParser X86CommonTableGen)
diff --git a/lib/Target/X86/AsmParser/LLVMBuild.txt b/lib/Target/X86/AsmParser/LLVMBuild.txt
index 6c2405a..9f94d5d 100644
--- a/lib/Target/X86/AsmParser/LLVMBuild.txt
+++ b/lib/Target/X86/AsmParser/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = X86AsmParser
parent = X86
required_libraries = MC MCParser Support X86Desc X86Info
add_to_library_groups = X86
-
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 4542d4b..be15899 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -51,21 +51,6 @@ endif()
add_llvm_target(X86CodeGen ${sources})
-add_llvm_library_dependencies(LLVMX86CodeGen
- LLVMAnalysis
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- LLVMX86AsmPrinter
- LLVMX86Desc
- LLVMX86Info
- LLVMX86Utils
- )
-
add_subdirectory(AsmParser)
add_subdirectory(Disassembler)
add_subdirectory(InstPrinter)
diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt
index 4f570d5..0cd6db9 100644
--- a/lib/Target/X86/Disassembler/CMakeLists.txt
+++ b/lib/Target/X86/Disassembler/CMakeLists.txt
@@ -5,12 +5,6 @@ add_llvm_library(LLVMX86Disassembler
X86DisassemblerDecoder.c
)
-add_llvm_library_dependencies(LLVMX86Disassembler
- LLVMMC
- LLVMSupport
- LLVMX86Info
- )
-
# workaround for hanging compilation on MSVC9 and 10
if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
set_property(
diff --git a/lib/Target/X86/Disassembler/LLVMBuild.txt b/lib/Target/X86/Disassembler/LLVMBuild.txt
index cd748cf..cac7adf 100644
--- a/lib/Target/X86/Disassembler/LLVMBuild.txt
+++ b/lib/Target/X86/Disassembler/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = X86Disassembler
parent = X86
required_libraries = MC Support X86Info
add_to_library_groups = X86
-
diff --git a/lib/Target/X86/InstPrinter/CMakeLists.txt b/lib/Target/X86/InstPrinter/CMakeLists.txt
index 2a2b5db..28e2460 100644
--- a/lib/Target/X86/InstPrinter/CMakeLists.txt
+++ b/lib/Target/X86/InstPrinter/CMakeLists.txt
@@ -6,10 +6,4 @@ add_llvm_library(LLVMX86AsmPrinter
X86InstComments.cpp
)
-add_llvm_library_dependencies(LLVMX86AsmPrinter
- LLVMMC
- LLVMSupport
- LLVMX86Utils
- )
-
add_dependencies(LLVMX86AsmPrinter X86CommonTableGen)
diff --git a/lib/Target/X86/InstPrinter/LLVMBuild.txt b/lib/Target/X86/InstPrinter/LLVMBuild.txt
index fb01323..6868dde 100644
--- a/lib/Target/X86/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/X86/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = X86AsmPrinter
parent = X86
required_libraries = MC Support X86Utils
add_to_library_groups = X86
-
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 6e87efa..6e4b1b9 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -106,28 +106,92 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
// FALL THROUGH.
case X86::PUNPCKHBWrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKHMask(16, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);
+ break;
+ case X86::VPUNPCKHBWrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHBWrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);
+ break;
+ case X86::VPUNPCKHBWYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHBWYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v32i8, ShuffleMask);
break;
case X86::PUNPCKHWDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKHWDrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKHMask(8, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);
+ break;
+ case X86::VPUNPCKHWDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHWDrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);
+ break;
+ case X86::VPUNPCKHWDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHWDYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v16i16, ShuffleMask);
break;
case X86::PUNPCKHDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKHDQrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKHMask(4, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);
+ break;
+ case X86::VPUNPCKHDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHDQrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);
+ break;
+ case X86::VPUNPCKHDQYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHDQYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v8i32, ShuffleMask);
break;
case X86::PUNPCKHQDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKHQDQrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKHMask(2, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);
+ break;
+ case X86::VPUNPCKHQDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHQDQrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);
+ break;
+ case X86::VPUNPCKHQDQYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHQDQYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v4i64, ShuffleMask);
break;
case X86::PUNPCKLBWrr:
@@ -135,42 +199,117 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
// FALL THROUGH.
case X86::PUNPCKLBWrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLBWMask(16, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);
+ break;
+ case X86::VPUNPCKLBWrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLBWrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);
+ break;
+ case X86::VPUNPCKLBWYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLBWYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v32i8, ShuffleMask);
break;
case X86::PUNPCKLWDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLWDrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLWDMask(8, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);
+ break;
+ case X86::VPUNPCKLWDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLWDrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);
+ break;
+ case X86::VPUNPCKLWDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLWDYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v16i16, ShuffleMask);
break;
case X86::PUNPCKLDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLDQrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLDQMask(4, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);
+ break;
+ case X86::VPUNPCKLDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLDQrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);
+ break;
+ case X86::VPUNPCKLDQYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLDQYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v8i32, ShuffleMask);
break;
case X86::PUNPCKLQDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLQDQrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLQDQMask(2, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);
+ break;
+ case X86::VPUNPCKLQDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLQDQrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);
+ break;
+ case X86::VPUNPCKLQDQYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLQDQYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v4i64, ShuffleMask);
break;
case X86::SHUFPDrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::SHUFPDrmi:
- DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
+ DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VSHUFPDrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VSHUFPDrmi:
- DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
+ DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VSHUFPDYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VSHUFPDYrmi:
+ DecodeSHUFPMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -179,14 +318,25 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::SHUFPSrmi:
- DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
+ DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VSHUFPSrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VSHUFPSrmi:
- DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
+ DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VSHUFPSYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VSHUFPSYrmi:
+ DecodeSHUFPMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -195,14 +345,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKLPDrm:
- DecodeUNPCKLPMask(MVT::v2f64, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VUNPCKLPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKLPDrm:
- DecodeUNPCKLPMask(MVT::v2f64, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -210,7 +360,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKLPDYrm:
- DecodeUNPCKLPMask(MVT::v4f64, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v4f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -218,14 +368,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKLPSrm:
- DecodeUNPCKLPMask(MVT::v4f32, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VUNPCKLPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKLPSrm:
- DecodeUNPCKLPMask(MVT::v4f32, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -233,7 +383,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKLPSYrm:
- DecodeUNPCKLPMask(MVT::v8f32, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v8f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -241,14 +391,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKHPDrm:
- DecodeUNPCKHPMask(MVT::v2f64, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VUNPCKHPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKHPDrm:
- DecodeUNPCKHPMask(MVT::v2f64, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -256,7 +406,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKHPDYrm:
- DecodeUNPCKLPMask(MVT::v4f64, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v4f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -264,14 +414,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKHPSrm:
- DecodeUNPCKHPMask(MVT::v4f32, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VUNPCKHPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKHPSrm:
- DecodeUNPCKHPMask(MVT::v4f32, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -279,34 +429,52 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKHPSYrm:
- DecodeUNPCKHPMask(MVT::v8f32, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v8f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VPERMILPSri:
- DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMILPSmi:
+ DecodeVPERMILPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VPERMILPSYri:
- DecodeVPERMILPSMask(8, MI->getOperand(2).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMILPSYmi:
+ DecodeVPERMILPMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VPERMILPDri:
- DecodeVPERMILPDMask(2, MI->getOperand(2).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMILPDmi:
+ DecodeVPERMILPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VPERMILPDYri:
- DecodeVPERMILPDMask(4, MI->getOperand(2).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMILPDYmi:
+ DecodeVPERMILPMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VPERM2F128rr:
- DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
+ case X86::VPERM2I128rr:
Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPERM2F128rm:
+ case X86::VPERM2I128rm:
+ DecodeVPERM2F128Mask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
}
diff --git a/lib/Target/X86/LLVMBuild.txt b/lib/Target/X86/LLVMBuild.txt
index 514566c..87305e0 100644
--- a/lib/Target/X86/LLVMBuild.txt
+++ b/lib/Target/X86/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo Utils
+
[component_0]
type = TargetGroup
name = X86
@@ -30,4 +33,3 @@ name = X86CodeGen
parent = X86
required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target X86AsmPrinter X86Desc X86Info X86Utils
add_to_library_groups = X86
-
diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
index 264e791..ab2ebb4 100644
--- a/lib/Target/X86/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
@@ -6,13 +6,6 @@ add_llvm_library(LLVMX86Desc
X86MachObjectWriter.cpp
)
-add_llvm_library_dependencies(LLVMX86Desc
- LLVMMC
- LLVMSupport
- LLVMX86AsmPrinter
- LLVMX86Info
- )
-
add_dependencies(LLVMX86Desc X86CommonTableGen)
# Hack: we need to include 'main' target directory to grab private headers
diff --git a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
index 3d09301..9e1d29c 100644
--- a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = X86Desc
parent = X86
required_libraries = MC Support X86AsmPrinter X86Info
add_to_library_groups = X86
-
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 69ad7d7..87b2b05 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -107,6 +107,11 @@ public:
bool MayNeedRelaxation(const MCInst &Inst) const;
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const;
+
void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
@@ -244,6 +249,14 @@ bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
return hasExp && !hasRIP;
}
+bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // Relax if the value is too big for a (signed) i8.
+ return int64_t(Value) != int64_t(int8_t(Value));
+}
+
// FIXME: Can tblgen help at all here to verify there aren't other instructions
// we can relax?
void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index c50f785..662ac1d 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -304,6 +304,12 @@ namespace X86II {
// TAXD - Prefix before and after 0x0F. Combination of TA and XD.
TAXD = 19 << Op0Shift,
+ // XOP8 - Prefix to include use of imm byte.
+ XOP8 = 20 << Op0Shift,
+
+ // XOP9 - Prefix to exclude use of imm byte.
+ XOP9 = 21 << Op0Shift,
+
//===------------------------------------------------------------------===//
// REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
// They are used to specify GPRs and SSE registers, 64-bit operand size,
@@ -418,7 +424,16 @@ namespace X86II {
/// storing a classifier in the imm8 field. To simplify our implementation,
/// we handle this by storeing the classifier in the opcode field and using
/// this flag to indicate that the encoder should do the wacky 3DNow! thing.
- Has3DNow0F0FOpcode = 1U << 7
+ Has3DNow0F0FOpcode = 1U << 7,
+
+ /// XOP_W - Same bit as VEX_W. Used to indicate swapping of
+ /// operand 3 and 4 to be encoded in ModRM or I8IMM. This is used
+ /// for FMA4 and XOP instructions.
+ XOP_W = 1U << 8,
+
+ /// XOP - Opcode prefix used by XOP instructions.
+ XOP = 1U << 9
+
};
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
@@ -488,9 +503,12 @@ namespace X86II {
return 0;
case X86II::MRMSrcMem: {
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ bool HasXOP_W = (TSFlags >> X86II::VEXShift) & X86II::XOP_W;
unsigned FirstMemOp = 1;
if (HasVEX_4V)
++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
+ if (HasXOP_W)
+ ++FirstMemOp;// Skip the register source (which is encoded in I8IMM).
// FIXME: Maybe lea should have its own form? This is a horrible hack.
//if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 2703100..eb64ad1 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -125,7 +125,19 @@ getNonexecutableStackSection(MCContext &Ctx) const {
0, SectionKind::getMetadata());
}
-X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
+X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
+ if (Triple.getArch() == Triple::x86_64) {
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+ }
+
+ AsmTransCBE = x86_asm_table;
+ AssemblerDialect = AsmWriterFlavor;
+
+ TextAlignFillValue = 0x90;
+}
+
+X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
if (Triple.getArch() == Triple::x86_64) {
GlobalPrefix = "";
PrivateGlobalPrefix = ".L";
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
index 2cd4c8e..5d619e8 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
@@ -38,8 +38,12 @@ namespace llvm {
virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
};
- struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF {
- explicit X86MCAsmInfoCOFF(const Triple &Triple);
+ struct X86MCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
+ explicit X86MCAsmInfoMicrosoft(const Triple &Triple);
+ };
+
+ struct X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF {
+ explicit X86MCAsmInfoGNUCOFF(const Triple &Triple);
};
} // namespace llvm
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 1ab469c..8e14cb1 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -169,23 +169,36 @@ static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
return false;
}
-/// StartsWithGlobalOffsetTable - Return true for the simple cases where this
-/// expression starts with _GLOBAL_OFFSET_TABLE_. This is a needed to support
-/// PIC on ELF i386 as that symbol is magic. We check only simple case that
+/// StartsWithGlobalOffsetTable - Check if this expression starts with
+/// _GLOBAL_OFFSET_TABLE_ and if it is of the form
+/// _GLOBAL_OFFSET_TABLE_-symbol. This is needed to support PIC on ELF
+/// i386 as _GLOBAL_OFFSET_TABLE_ is magical. We check only simple case that
/// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start
/// of a binary expression.
-static bool StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+enum GlobalOffsetTableExprKind {
+ GOT_None,
+ GOT_Normal,
+ GOT_SymDiff
+};
+static GlobalOffsetTableExprKind
+StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+ const MCExpr *RHS = 0;
if (Expr->getKind() == MCExpr::Binary) {
const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr);
Expr = BE->getLHS();
+ RHS = BE->getRHS();
}
if (Expr->getKind() != MCExpr::SymbolRef)
- return false;
+ return GOT_None;
const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
const MCSymbol &S = Ref->getSymbol();
- return S.getName() == "_GLOBAL_OFFSET_TABLE_";
+ if (S.getName() != "_GLOBAL_OFFSET_TABLE_")
+ return GOT_None;
+ if (RHS && RHS->getKind() == MCExpr::SymbolRef)
+ return GOT_SymDiff;
+ return GOT_Normal;
}
void X86MCCodeEmitter::
@@ -209,12 +222,15 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
// If we have an immoffset, add it to the expression.
if ((FixupKind == FK_Data_4 ||
- FixupKind == MCFixupKind(X86::reloc_signed_4byte)) &&
- StartsWithGlobalOffsetTable(Expr)) {
- assert(ImmOffset == 0);
-
- FixupKind = MCFixupKind(X86::reloc_global_offset_table);
- ImmOffset = CurByte;
+ FixupKind == MCFixupKind(X86::reloc_signed_4byte))) {
+ GlobalOffsetTableExprKind Kind = StartsWithGlobalOffsetTable(Expr);
+ if (Kind != GOT_None) {
+ assert(ImmOffset == 0);
+
+ FixupKind = MCFixupKind(X86::reloc_global_offset_table);
+ if (Kind == GOT_Normal)
+ ImmOffset = CurByte;
+ }
}
// If the fixup is pc-relative, we need to bias the value to be relative to
@@ -415,6 +431,13 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// opcode extension, or ignored, depending on the opcode byte)
unsigned char VEX_W = 0;
+ // XOP_W: opcode specific, same bit as VEX_W, but used to
+ // swap operand 3 and 4 for FMA4 and XOP instructions
+ unsigned char XOP_W = 0;
+
+ // XOP: Use XOP prefix byte 0x8f instead of VEX.
+ unsigned char XOP = 0;
+
// VEX_5M (VEX m-mmmmm field):
//
// 0b00000: Reserved for future use
@@ -422,7 +445,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// 0b00010: implied 0F 38 leading opcode bytes
// 0b00011: implied 0F 3A leading opcode bytes
// 0b00100-0b11111: Reserved for future use
- //
+ // 0b01000: XOP map select - 08h instructions with imm byte
+ // 0b10001: XOP map select - 09h instructions with no imm byte
unsigned char VEX_5M = 0x1;
// VEX_4V (VEX vvvv field): a register specifier
@@ -453,6 +477,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
VEX_W = 1;
+ if ((TSFlags >> X86II::VEXShift) & X86II::XOP_W)
+ XOP_W = 1;
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::XOP)
+ XOP = 1;
+
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
VEX_L = 1;
@@ -482,6 +512,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
case X86II::XD: // F2 0F
VEX_PP = 0x3;
break;
+ case X86II::XOP8:
+ VEX_5M = 0x8;
+ break;
+ case X86II::XOP9:
+ VEX_5M = 0x9;
+ break;
case X86II::A6: // Bypass: Not used by VEX
case X86II::A7: // Bypass: Not used by VEX
case X86II::TB: // Bypass: Not used by VEX
@@ -489,6 +525,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
break; // No prefix!
}
+
// Set the vector length to 256-bit if YMM0-YMM15 is used
for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
if (!MI.getOperand(i).isReg())
@@ -529,6 +566,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// src1(ModR/M), MemAddr, imm8
// src1(ModR/M), MemAddr, src2(VEX_I8IMM)
//
+ // FMA4:
+ // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
VEX_R = 0x0;
@@ -620,16 +660,16 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
//
unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
- if (VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { // 2 byte VEX prefix
+ if (VEX_B && VEX_X && !VEX_W && !XOP && (VEX_5M == 1)) { // 2 byte VEX prefix
EmitByte(0xC5, CurByte, OS);
EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
return;
}
// 3 byte VEX prefix
- EmitByte(0xC4, CurByte, OS);
+ EmitByte(XOP ? 0x8F : 0xC4, CurByte, OS);
EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
- EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
+ EmitByte(LastByte | ((VEX_W | XOP_W) << 7), CurByte, OS);
}
/// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64
@@ -889,6 +929,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// It uses the VEX.VVVV field?
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+ bool HasXOP_W = (TSFlags >> X86II::VEXShift) & X86II::XOP_W;
+ unsigned XOP_W_I8IMMOperand = 2;
// Determine where the memory operand starts, if present.
int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
@@ -961,9 +1003,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
SrcRegNum++;
+ if(HasXOP_W) // Skip 2nd src (which is encoded in I8IMM)
+ SrcRegNum++;
+
EmitRegModRMByte(MI.getOperand(SrcRegNum),
GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
- CurOp = SrcRegNum + 1;
+
+ // 2 operands skipped with HasXOP_W, comensate accordingly
+ CurOp = HasXOP_W ? SrcRegNum : SrcRegNum + 1;
if (HasVEX_4VOp3)
++CurOp;
break;
@@ -975,6 +1022,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
++AddrOperands;
++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
}
+ if(HasXOP_W) // Skip second register source (encoded in I8IMM)
+ ++FirstMemOp;
EmitByte(BaseOpcode, CurByte, OS);
@@ -1062,12 +1111,24 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// according to the right size for the instruction.
if (CurOp != NumOps) {
// The last source register of a 4 operand instruction in AVX is encoded
- // in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
+ // in bits[7:4] of a immediate byte.
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
- const MCOperand &MO = MI.getOperand(CurOp++);
+ const MCOperand &MO = MI.getOperand(HasXOP_W ? XOP_W_I8IMMOperand
+ : CurOp);
+ CurOp++;
bool IsExtReg = X86II::isX86_64ExtendedReg(MO.getReg());
unsigned RegNum = (IsExtReg ? (1 << 7) : 0);
RegNum |= GetX86RegNum(MO) << 4;
+ // If there is an additional 5th operand it must be an immediate, which
+ // is encoded in bits[3:0]
+ if(CurOp != NumOps) {
+ const MCOperand &MIMM = MI.getOperand(CurOp++);
+ if(MIMM.isImm()) {
+ unsigned Val = MIMM.getImm();
+ assert(Val < 16 && "Immediate operand value out of range");
+ RegNum |= Val;
+ }
+ }
EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS,
Fixups);
} else {
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index a843515..f2a34ed 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -361,8 +361,10 @@ static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) {
MAI = new X86_64MCAsmInfoDarwin(TheTriple);
else
MAI = new X86MCAsmInfoDarwin(TheTriple);
- } else if (TheTriple.isOSWindows()) {
- MAI = new X86MCAsmInfoCOFF(TheTriple);
+ } else if (TheTriple.getOS() == Triple::Win32) {
+ MAI = new X86MCAsmInfoMicrosoft(TheTriple);
+ } else if (TheTriple.getOS() == Triple::MinGW32 || TheTriple.getOS() == Triple::Cygwin) {
+ MAI = new X86MCAsmInfoGNUCOFF(TheTriple);
} else {
MAI = new X86ELFMCAsmInfo(TheTriple);
}
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 7d901af..a581993 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -922,16 +922,3 @@ _test2: ## @test2
The insertps's of $0 are pointless complex copies.
//===---------------------------------------------------------------------===//
-
-If SSE4.1 is available we should inline rounding functions instead of emitting
-a libcall.
-
-floor: roundsd $0x01, %xmm, %xmm
-ceil: roundsd $0x02, %xmm, %xmm
-
-and likewise for the single precision versions.
-
-Currently, SelectionDAGBuilder doesn't turn calls to these functions into the
-corresponding nodes and some targets (including X86) aren't ready for them.
-
-//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/TargetInfo/CMakeLists.txt b/lib/Target/X86/TargetInfo/CMakeLists.txt
index 4da00fa..b1d0b9f 100644
--- a/lib/Target/X86/TargetInfo/CMakeLists.txt
+++ b/lib/Target/X86/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMX86Info
X86TargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMX86Info
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMX86Info X86CommonTableGen)
diff --git a/lib/Target/X86/TargetInfo/LLVMBuild.txt b/lib/Target/X86/TargetInfo/LLVMBuild.txt
index ee015bd..3c64a22 100644
--- a/lib/Target/X86/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/X86/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = X86Info
parent = X86
required_libraries = MC Support Target
add_to_library_groups = X86
-
diff --git a/lib/Target/X86/Utils/CMakeLists.txt b/lib/Target/X86/Utils/CMakeLists.txt
index caffd8b..2e72c34 100644
--- a/lib/Target/X86/Utils/CMakeLists.txt
+++ b/lib/Target/X86/Utils/CMakeLists.txt
@@ -4,9 +4,4 @@ add_llvm_library(LLVMX86Utils
X86ShuffleDecode.cpp
)
-add_llvm_library_dependencies(LLVMX86Utils
- LLVMCore
- LLVMSupport
- )
-
add_dependencies(LLVMX86Utils X86CommonTableGen)
diff --git a/lib/Target/X86/Utils/LLVMBuild.txt b/lib/Target/X86/Utils/LLVMBuild.txt
index 3ee441e..de0a30f 100644
--- a/lib/Target/X86/Utils/LLVMBuild.txt
+++ b/lib/Target/X86/Utils/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = X86Utils
parent = X86
required_libraries = Core Support
add_to_library_groups = X86
-
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index f6c9d7b..e7631b6 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -95,54 +95,31 @@ void DecodePSHUFLWMask(unsigned Imm,
ShuffleMask.push_back(7);
}
-void DecodePUNPCKLBWMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask);
-}
-
-void DecodePUNPCKLWDMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask);
-}
-
-void DecodePUNPCKLDQMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
-}
-
-void DecodePUNPCKLQDQMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
-}
-
-void DecodePUNPCKLMask(EVT VT,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- DecodeUNPCKLPMask(VT, ShuffleMask);
-}
+void DecodeSHUFPMask(EVT VT, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
-void DecodePUNPCKHMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(i+NElts/2);
- ShuffleMask.push_back(i+NElts+NElts/2);
- }
-}
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
-void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- // Part that reads from dest.
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(Imm % NElts);
- Imm /= NElts;
- }
- // Part that reads from src.
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(Imm % NElts + NElts);
- Imm /= NElts;
+ int NewImm = Imm;
+ for (unsigned l = 0; l < NumLanes; ++l) {
+ unsigned LaneStart = l * NumLaneElts;
+ // Part that reads from dest.
+ for (unsigned i = 0; i != NumLaneElts/2; ++i) {
+ ShuffleMask.push_back(NewImm % NumLaneElts + LaneStart);
+ NewImm /= NumLaneElts;
+ }
+ // Part that reads from src.
+ for (unsigned i = 0; i != NumLaneElts/2; ++i) {
+ ShuffleMask.push_back(NewImm % NumLaneElts + NumElts + LaneStart);
+ NewImm /= NumLaneElts;
+ }
+ if (NumLaneElts == 4) NewImm = Imm; // reload imm
}
}
-void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -161,10 +138,10 @@ void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
}
}
-/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
/// etc. VT indicates the type of the vector allowing it to handle different
/// datatypes and vector widths.
-void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -183,36 +160,23 @@ void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
}
}
-// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit
-// elements. For 256-bit vectors, it's considered as two 128 lanes, the
-// referenced elements can't cross lanes and the mask of the first lane must
-// be the same of the second.
-void DecodeVPERMILPSMask(unsigned NumElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- unsigned NumLanes = (NumElts*32)/128;
- unsigned LaneSize = NumElts/NumLanes;
-
- for (unsigned l = 0; l != NumLanes; ++l) {
- for (unsigned i = 0; i != LaneSize; ++i) {
- unsigned Idx = (Imm >> (i*2)) & 0x3 ;
- ShuffleMask.push_back(Idx+(l*LaneSize));
- }
- }
-}
+// DecodeVPERMILPMask - Decodes VPERMILPS/ VPERMILPD permutes for any 128-bit
+// 32-bit or 64-bit elements. For 256-bit vectors, it's considered as two 128
+// lanes. For VPERMILPS, referenced elements can't cross lanes and the mask of
+// the first lane must be the same of the second.
+void DecodeVPERMILPMask(EVT VT, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
-// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit
-// elements. For 256-bit vectors, it's considered as two 128 lanes, the
-// referenced elements can't cross lanes but the mask of the first lane can
-// be the different of the second (not like VPERMILPS).
-void DecodeVPERMILPDMask(unsigned NumElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- unsigned NumLanes = (NumElts*64)/128;
- unsigned LaneSize = NumElts/NumLanes;
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
- for (unsigned l = 0; l < NumLanes; ++l) {
- for (unsigned i = l*LaneSize; i < LaneSize*(l+1); ++i) {
- unsigned Idx = (Imm >> i) & 0x1;
- ShuffleMask.push_back(Idx+(l*LaneSize));
+ for (unsigned l = 0; l != NumLanes; ++l) {
+ unsigned LaneStart = l*NumLaneElts;
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ unsigned Idx = NumLaneElts == 4 ? (Imm >> (i*2)) & 0x3
+ : (Imm >> (i+LaneStart)) & 0x1;
+ ShuffleMask.push_back(Idx+LaneStart);
}
}
}
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 35f6530..243728f 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -46,50 +46,25 @@ void DecodePSHUFHWMask(unsigned Imm,
void DecodePSHUFLWMask(unsigned Imm,
SmallVectorImpl<unsigned> &ShuffleMask);
-void DecodePUNPCKLBWMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodePUNPCKLWDMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodePUNPCKLDQMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodePUNPCKLQDQMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodePUNPCKLMask(EVT VT,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodePUNPCKHMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeSHUFPMask(EVT VT, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
-/// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd
+/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
/// etc. VT indicates the type of the vector allowing it to handle different
/// datatypes and vector widths.
-void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
-/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
/// etc. VT indicates the type of the vector allowing it to handle different
/// datatypes and vector widths.
-void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
-
+void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
-// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit
-// elements. For 256-bit vectors, it's considered as two 128 lanes, the
-// referenced elements can't cross lanes and the mask of the first lane must
-// be the same of the second.
-void DecodeVPERMILPSMask(unsigned NElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
-// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit
-// elements. For 256-bit vectors, it's considered as two 128 lanes, the
-// referenced elements can't cross lanes but the mask of the first lane can
-// be the different of the second (not like VPERMILPS).
-void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm,
+// DecodeVPERMILPMask - Decodes VPERMILPS/ VPERMILPD permutes for any 128-bit
+// 32-bit or 64-bit elements. For 256-bit vectors, it's considered as two 128
+// lanes. For VPERMILPS, referenced elements can't cross lanes and the mask of
+// the first lane must be the same of the second.
+void DecodeVPERMILPMask(EVT VT, unsigned Imm,
SmallVectorImpl<unsigned> &ShuffleMask);
void DecodeVPERM2F128Mask(unsigned Imm,
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 62a7016..8229ca5 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -91,6 +91,8 @@ def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true",
"Enable three-operand fused multiple-add">;
def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
"Enable four-operand fused multiple-add">;
+def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
+ "Enable XOP instructions">;
def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
"HasVectorUAMem", "true",
"Allow unaligned memory operands on vector/SIMD instructions">;
@@ -194,14 +196,16 @@ def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A,
- Feature3DNowA, FeatureCMPXCHG16B,
+ Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
FeatureSlowBTMem]>;
-def : Proc<"barcelona", [FeatureSSE3, FeatureSSE4A,
- Feature3DNowA, FeatureCMPXCHG16B,
- FeatureSlowBTMem]>;
-def : Proc<"istanbul", [Feature3DNowA, FeatureCMPXCHG16B,
- FeatureSSE4A]>;
-def : Proc<"shanghai", [Feature3DNowA, FeatureCMPXCHG16B, FeatureSSE4A]>;
+// FIXME: Disabling AVX for now since it's not ready.
+def : Proc<"bdver1", [FeatureSSE42, FeatureSSE4A, FeatureCMPXCHG16B,
+ FeatureAES, FeatureCLMUL, FeatureFMA4,
+ FeatureXOP, FeatureLZCNT]>;
+def : Proc<"bdver2", [FeatureSSE42, FeatureSSE4A, FeatureCMPXCHG16B,
+ FeatureAES, FeatureCLMUL, FeatureFMA4,
+ FeatureXOP, FeatureF16C, FeatureLZCNT,
+ FeatureBMI]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
def : Proc<"winchip2", [Feature3DNow]>;
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 77b9905..aab2a05 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -158,10 +158,15 @@ def CC_X86_64_C : CallingConv<[
CCIfSubtarget<"hasXMM()",
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
- // The first 8 256-bit vector arguments are passed in YMM registers.
- CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
- CCIfSubtarget<"hasAVX()",
- CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7]>>>,
+ // The first 8 256-bit vector arguments are passed in YMM registers, unless
+ // this is a vararg function.
+ // FIXME: This isn't precisely correct; the x86-64 ABI document says that
+ // fixed arguments to vararg functions are supposed to be passed in
+ // registers. Actually modeling that would be a lot of work, though.
+ CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()",
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3,
+ YMM4, YMM5, YMM6, YMM7]>>>>,
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index ba615a8..ed16e88 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -1004,7 +1004,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
break;
}
- if (!Desc->isVariadic() && CurOp != NumOps) {
+ if (!MI.isVariadic() && CurOp != NumOps) {
#ifndef NDEBUG
dbgs() << "Cannot encode all operands of: " << MI << "\n";
#endif
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 32f1770..1589439 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -728,7 +728,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// fastcc with -tailcallopt is intended to provide a guaranteed
// tail call optimization. Fastisel doesn't know how to do that.
- if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+ if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
return false;
// Let SDISel handle vararg functions.
@@ -1529,7 +1529,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
// fastcc with -tailcallopt is intended to provide a guaranteed
// tail call optimization. Fastisel doesn't know how to do that.
- if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+ if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
return false;
PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
@@ -1543,7 +1543,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
// Fast-isel doesn't know about callee-pop yet.
if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg,
- GuaranteedTailCallOpt))
+ TM.Options.GuaranteedTailCallOpt))
return false;
// Check whether the function can return without sret-demotion.
@@ -2121,7 +2121,7 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
default: return false;
case MVT::f32:
if (X86ScalarSSEf32) {
- Opc = Subtarget->hasAVX() ? X86::VFsFLD0SS : X86::FsFLD0SS;
+ Opc = X86::FsFLD0SS;
RC = X86::FR32RegisterClass;
} else {
Opc = X86::LD_Fp032;
@@ -2130,7 +2130,7 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
break;
case MVT::f64:
if (X86ScalarSSEf64) {
- Opc = Subtarget->hasAVX() ? X86::VFsFLD0SD : X86::FsFLD0SD;
+ Opc = X86::FsFLD0SD;
RC = X86::FR64RegisterClass;
} else {
Opc = X86::LD_Fp064;
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 819d242..6a40cc1 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -47,7 +47,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineModuleInfo &MMI = MF.getMMI();
const TargetRegisterInfo *RI = TM.getRegisterInfo();
- return (DisableFramePointerElim(MF) ||
+ return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
RI->needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken() ||
@@ -210,7 +210,7 @@ static
void mergeSPUpdatesDown(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
unsigned StackPtr, uint64_t *NumBytes = NULL) {
- // FIXME: THIS ISN'T RUN!!!
+ // FIXME: THIS ISN'T RUN!!!
return;
if (MBBI == MBB.end()) return;
@@ -351,20 +351,22 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
/// register. The number corresponds to the enum lists in
/// compact_unwind_encoding.h.
static int getCompactUnwindRegNum(const unsigned *CURegs, unsigned Reg) {
- int Idx = 1;
- for (; *CURegs; ++CURegs, ++Idx)
+ for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
if (*CURegs == Reg)
return Idx;
return -1;
}
+// Number of registers that can be saved in a compact unwind encoding.
+#define CU_NUM_SAVED_REGS 6
+
/// encodeCompactUnwindRegistersWithoutFrame - Create the permutation encoding
/// used with frameless stacks. It is passed the number of registers to be saved
/// and an array of the registers saved.
-static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6],
- unsigned RegCount,
- bool Is64Bit) {
+static uint32_t
+encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
+ unsigned RegCount, bool Is64Bit) {
// The saved registers are numbered from 1 to 6. In order to encode the order
// in which they were saved, we re-number them according to their place in the
// register order. The re-numbering is relative to the last re-numbered
@@ -385,14 +387,21 @@ static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6],
};
const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
- uint32_t RenumRegs[6];
- for (unsigned i = 6 - RegCount; i < 6; ++i) {
+ for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) {
int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]);
if (CUReg == -1) return ~0U;
SavedRegs[i] = CUReg;
+ }
+
+ // Reverse the list.
+ std::swap(SavedRegs[0], SavedRegs[5]);
+ std::swap(SavedRegs[1], SavedRegs[4]);
+ std::swap(SavedRegs[2], SavedRegs[3]);
+ uint32_t RenumRegs[CU_NUM_SAVED_REGS];
+ for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i) {
unsigned Countless = 0;
- for (unsigned j = 6 - RegCount; j < i; ++j)
+ for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
if (SavedRegs[j] < SavedRegs[i])
++Countless;
@@ -435,8 +444,9 @@ static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6],
/// encodeCompactUnwindRegistersWithFrame - Return the registers encoded for a
/// compact encoding with a frame pointer.
-static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6],
- bool Is64Bit) {
+static uint32_t
+encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
+ bool Is64Bit) {
static const unsigned CU32BitRegs[] = {
X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
};
@@ -448,13 +458,16 @@ static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6],
// Encode the registers in the order they were saved, 3-bits per register. The
// registers are numbered from 1 to 6.
uint32_t RegEnc = 0;
- for (int I = 5; I >= 0; --I) {
+ for (int I = 0; I != 6; --I) {
unsigned Reg = SavedRegs[I];
if (Reg == 0) break;
int CURegNum = getCompactUnwindRegNum(CURegs, Reg);
if (CURegNum == -1)
return ~0U;
- RegEnc |= (CURegNum & 0x7) << (5 - I);
+
+ // Encode the 3-bit register number in order, skipping over 3-bits for each
+ // register.
+ RegEnc |= (CURegNum & 0x7) << ((5 - I) * 3);
}
assert((RegEnc & 0x7FFF) == RegEnc && "Invalid compact register encoding!");
@@ -466,14 +479,11 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
unsigned FramePtr = RegInfo->getFrameRegister(MF);
unsigned StackPtr = RegInfo->getStackRegister();
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
-
bool Is64Bit = STI.is64Bit();
bool HasFP = hasFP(MF);
- unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 };
- int SavedRegIdx = 6;
+ unsigned SavedRegs[CU_NUM_SAVED_REGS] = { 0, 0, 0, 0, 0, 0 };
+ unsigned SavedRegIdx = 0;
unsigned OffsetSize = (Is64Bit ? 8 : 4);
@@ -481,14 +491,13 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
unsigned PushInstrSize = 1;
unsigned MoveInstr = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
unsigned MoveInstrSize = (Is64Bit ? 3 : 2);
- unsigned SubtractInstr = getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta);
unsigned SubtractInstrIdx = (Is64Bit ? 3 : 2);
unsigned StackDivide = (Is64Bit ? 8 : 4);
unsigned InstrOffset = 0;
- unsigned CFAOffset = 0;
unsigned StackAdjust = 0;
+ unsigned StackSize = 0;
MachineBasicBlock &MBB = MF.front(); // Prologue is in entry BB.
bool ExpectEnd = false;
@@ -504,10 +513,10 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
if (Opc == PushInstr) {
// If there are too many saved registers, we cannot use compact encoding.
- if (--SavedRegIdx < 0) return 0;
+ if (SavedRegIdx >= CU_NUM_SAVED_REGS) return 0;
- SavedRegs[SavedRegIdx] = MI.getOperand(0).getReg();
- CFAOffset += OffsetSize;
+ SavedRegs[SavedRegIdx++] = MI.getOperand(0).getReg();
+ StackAdjust += OffsetSize;
InstrOffset += PushInstrSize;
} else if (Opc == MoveInstr) {
unsigned SrcReg = MI.getOperand(1).getReg();
@@ -516,13 +525,14 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
if (DstReg != FramePtr || SrcReg != StackPtr)
return 0;
- CFAOffset = 0;
+ StackAdjust = 0;
memset(SavedRegs, 0, sizeof(SavedRegs));
- SavedRegIdx = 6;
+ SavedRegIdx = 0;
InstrOffset += MoveInstrSize;
- } else if (Opc == SubtractInstr) {
- if (StackAdjust)
- // We all ready have a stack pointer adjustment.
+ } else if (Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) {
+ if (StackSize)
+ // We already have a stack size.
return 0;
if (!MI.getOperand(0).isReg() ||
@@ -533,7 +543,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
// %RSP<def> = SUB64ri8 %RSP, 48
return 0;
- StackAdjust = MI.getOperand(2).getImm() / StackDivide;
+ StackSize = MI.getOperand(2).getImm() / StackDivide;
SubtractInstrIdx += InstrOffset;
ExpectEnd = true;
}
@@ -541,28 +551,30 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
// Encode that we are using EBP/RBP as the frame pointer.
uint32_t CompactUnwindEncoding = 0;
- CFAOffset /= StackDivide;
+ StackAdjust /= StackDivide;
if (HasFP) {
- if ((CFAOffset & 0xFF) != CFAOffset)
+ if ((StackAdjust & 0xFF) != StackAdjust)
// Offset was too big for compact encoding.
return 0;
// Get the encoding of the saved registers when we have a frame pointer.
uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit);
- if (RegEnc == ~0U)
- return 0;
+ if (RegEnc == ~0U) return 0;
CompactUnwindEncoding |= 0x01000000;
- CompactUnwindEncoding |= (CFAOffset & 0xFF) << 16;
+ CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
CompactUnwindEncoding |= RegEnc & 0x7FFF;
} else {
- unsigned FullOffset = CFAOffset + StackAdjust;
- if ((FullOffset & 0xFF) == FullOffset) {
- // Frameless stack.
+ ++StackAdjust;
+ uint32_t TotalStackSize = StackAdjust + StackSize;
+ if ((TotalStackSize & 0xFF) == TotalStackSize) {
+ // Frameless stack with a small stack size.
CompactUnwindEncoding |= 0x02000000;
- CompactUnwindEncoding |= (FullOffset & 0xFF) << 16;
+
+ // Encode the stack size.
+ CompactUnwindEncoding |= (TotalStackSize & 0xFF) << 16;
} else {
- if ((CFAOffset & 0x7) != CFAOffset)
+ if ((StackAdjust & 0x7) != StackAdjust)
// The extra stack adjustments are too big for us to handle.
return 0;
@@ -573,16 +585,21 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
// instruction.
CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
- // Encode any extra stack stack changes (done via push instructions).
- CompactUnwindEncoding |= (CFAOffset & 0x7) << 13;
+ // Encode any extra stack stack adjustments (done via push instructions).
+ CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
}
+ // Encode the number of registers saved.
+ CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
+
// Get the encoding of the saved registers when we don't have a frame
// pointer.
- uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegs,
- 6 - SavedRegIdx,
- Is64Bit);
+ uint32_t RegEnc =
+ encodeCompactUnwindRegistersWithoutFrame(SavedRegs, SavedRegIdx,
+ Is64Bit);
if (RegEnc == ~0U) return 0;
+
+ // Encode the register encoding.
CompactUnwindEncoding |= RegEnc & 0x3FF;
}
@@ -638,10 +655,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// stack pointer (we fit in the Red Zone).
if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
!RegInfo->needsStackRealignment(MF) &&
- !MFI->hasVarSizedObjects() && // No dynamic alloca.
- !MFI->adjustsStack() && // No calls.
- !IsWin64 && // Win64 has no Red Zone
- !EnableSegmentedStacks) { // Regular stack
+ !MFI->hasVarSizedObjects() && // No dynamic alloca.
+ !MFI->adjustsStack() && // No calls.
+ !IsWin64 && // Win64 has no Red Zone
+ !MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
if (HasFP) MinSize += SlotSize;
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
@@ -978,7 +995,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
unsigned Opc = PI->getOpcode();
if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
- !PI->getDesc().isTerminator())
+ !PI->isTerminator())
break;
--MBBI;
@@ -1306,6 +1323,10 @@ GetScratchRegister(bool Is64Bit, const MachineFunction &MF) {
}
}
+// The stack limit in the TCB is set to this many bytes above the actual stack
+// limit.
+static const uint64_t kSplitStackAvailable = 256;
+
void
X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
MachineBasicBlock &prologueMBB = MF.front();
@@ -1360,16 +1381,24 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
TlsReg = X86::FS;
TlsOffset = 0x70;
- BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
- .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
+ if (StackSize < kSplitStackAvailable)
+ ScratchReg = X86::RSP;
+ else
+ BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
+ .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
+
BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg)
.addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
} else {
TlsReg = X86::GS;
TlsOffset = 0x30;
- BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
- .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
+ if (StackSize < kSplitStackAvailable)
+ ScratchReg = X86::ESP;
+ else
+ BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
+ .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
+
BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
.addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
}
@@ -1394,9 +1423,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
MF.getRegInfo().setPhysRegUsed(X86::R10);
MF.getRegInfo().setPhysRegUsed(X86::R11);
} else {
- // Since we'll call __morestack, stack alignment needs to be preserved.
- BuildMI(allocMBB, DL, TII.get(X86::SUB32ri), X86::ESP).addReg(X86::ESP)
- .addImm(8);
BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
.addImm(X86FI->getArgumentStackSize());
BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
@@ -1411,11 +1437,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
.addExternalSymbol("__morestack");
- // __morestack only seems to remove 8 bytes off the stack. Add back the
- // additional 8 bytes we added before pushing the arguments.
- if (!Is64Bit)
- BuildMI(allocMBB, DL, TII.get(X86::ADD32ri), X86::ESP).addReg(X86::ESP)
- .addImm(8);
if (IsNested)
BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
else
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 96c6f41..03727a2 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -256,7 +256,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->is64Bit()) {
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
- } else if (!UseSoftFloat) {
+ } else if (!TM.Options.UseSoftFloat) {
// We have an algorithm for SSE2->double, and we turn this into a
// 64-bit FILD followed by conditional FADD for other targets.
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
@@ -270,7 +270,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
- if (!UseSoftFloat) {
+ if (!TM.Options.UseSoftFloat) {
// SSE has no i16 to fp conversion, only i32
if (X86ScalarSSEf32) {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
@@ -313,7 +313,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->is64Bit()) {
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
- } else if (!UseSoftFloat) {
+ } else if (!TM.Options.UseSoftFloat) {
// Since AVX is a superset of SSE3, only check for SSE here.
if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
// Expand FP_TO_UINT into a select.
@@ -378,6 +378,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FREM , MVT::f80 , Expand);
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i16 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i64 , Expand);
if (Subtarget->hasBMI()) {
setOperationAction(ISD::CTTZ , MVT::i8 , Promote);
} else {
@@ -388,6 +392,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
}
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i8 , Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i16 , Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i64 , Expand);
if (Subtarget->hasLZCNT()) {
setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
} else {
@@ -537,14 +545,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Custom);
- else if (EnableSegmentedStacks)
+ else if (TM.Options.EnableSegmentedStacks)
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Expand);
- if (!UseSoftFloat && X86ScalarSSEf64) {
+ if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
// f32 and f64 use SSE.
// Set up the FP register classes.
addRegisterClass(MVT::f32, X86::FR32RegisterClass);
@@ -576,7 +584,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// cases we handle.
addLegalFPImmediate(APFloat(+0.0)); // xorpd
addLegalFPImmediate(APFloat(+0.0f)); // xorps
- } else if (!UseSoftFloat && X86ScalarSSEf32) {
+ } else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) {
// Use SSE for f32, x87 for f64.
// Set up the FP register classes.
addRegisterClass(MVT::f32, X86::FR32RegisterClass);
@@ -605,11 +613,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
- if (!UnsafeFPMath) {
+ if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64 , Expand);
setOperationAction(ISD::FCOS , MVT::f64 , Expand);
}
- } else if (!UseSoftFloat) {
+ } else if (!TM.Options.UseSoftFloat) {
// f32 and f64 in x87.
// Set up the FP register classes.
addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
@@ -620,7 +628,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
- if (!UnsafeFPMath) {
+ if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64 , Expand);
setOperationAction(ISD::FCOS , MVT::f64 , Expand);
}
@@ -639,7 +647,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FMA, MVT::f32, Expand);
// Long double always uses X87.
- if (!UseSoftFloat) {
+ if (!TM.Options.UseSoftFloat) {
addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
@@ -658,11 +666,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
}
- if (!UnsafeFPMath) {
+ if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f80 , Expand);
setOperationAction(ISD::FCOS , MVT::f80 , Expand);
}
+ setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f80, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
+ setOperationAction(ISD::FRINT, MVT::f80, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
setOperationAction(ISD::FMA, MVT::f80, Expand);
}
@@ -714,7 +727,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand);
@@ -748,7 +763,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
// with -msoft-float, disable use of MMX as well.
- if (!UseSoftFloat && Subtarget->hasMMX()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) {
addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass);
// No operations on x86mmx supported, everything uses intrinsics.
}
@@ -785,7 +800,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
- if (!UseSoftFloat && Subtarget->hasXMM()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasXMM()) {
addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
setOperationAction(ISD::FADD, MVT::v4f32, Legal);
@@ -802,7 +817,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
}
- if (!UseSoftFloat && Subtarget->hasXMMInt()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasXMMInt()) {
addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
// FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
@@ -983,7 +998,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->hasSSE42orAVX())
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
- if (!UseSoftFloat && Subtarget->hasAVX()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) {
addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
addRegisterClass(MVT::v16i16, X86::VR256RegisterClass);
addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
@@ -1211,10 +1226,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
- setPrefLoopAlignment(16);
+ setPrefLoopAlignment(4); // 2^4 bytes.
benefitFromCodePlacementOpt = true;
- setPrefFunctionAlignment(4);
+ setPrefFunctionAlignment(4); // 2^4 bytes.
}
@@ -1709,7 +1724,8 @@ bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
/// FuncIsMadeTailCallSafe - Return true if the function is being made into
/// a tailcall target by changing its ABI.
-static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) {
+static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
+ bool GuaranteedTailCallOpt) {
return GuaranteedTailCallOpt && IsTailCallConvention(CC);
}
@@ -1723,7 +1739,8 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
unsigned i) const {
// Create the nodes corresponding to a load from this parameter slot.
ISD::ArgFlagsTy Flags = Ins[i].Flags;
- bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv);
+ bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv,
+ getTargetMachine().Options.GuaranteedTailCallOpt);
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
EVT ValVT;
@@ -1873,7 +1890,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
unsigned StackSize = CCInfo.getNextStackOffset();
// Align stack specially for tail calls.
- if (FuncIsMadeTailCallSafe(CallConv))
+ if (FuncIsMadeTailCallSafe(CallConv,
+ MF.getTarget().Options.GuaranteedTailCallOpt))
StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
// If the function takes variable number of arguments, make a frame index for
@@ -1918,9 +1936,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
assert(!(NumXMMRegs && !Subtarget->hasXMM()) &&
"SSE register cannot be used when SSE is disabled!");
- assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
+ assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
+ NoImplicitFloatOps) &&
"SSE register cannot be used when SSE is disabled!");
- if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM())
+ if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
+ !Subtarget->hasXMM())
// Kernel mode asks for SSE to be disabled, so don't push them
// on the stack.
TotalNumXMMRegs = 0;
@@ -1998,7 +2018,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
}
// Some CCs need callee pop.
- if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) {
+ if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
+ MF.getTarget().Options.GuaranteedTailCallOpt)) {
FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
} else {
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
@@ -2098,7 +2119,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
- if (!GuaranteedTailCallOpt && isTailCall)
+ if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
IsSibcall = true;
if (isTailCall)
@@ -2126,7 +2147,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
NumBytes = 0;
- else if (GuaranteedTailCallOpt && IsTailCallConvention(CallConv))
+ else if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ IsTailCallConvention(CallConv))
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
@@ -2305,7 +2327,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
int FI = 0;
// Do not flag preceding copytoreg stuff together with the following stuff.
InFlag = SDValue();
- if (GuaranteedTailCallOpt) {
+ if (getTargetMachine().Options.GuaranteedTailCallOpt) {
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (VA.isRegLoc())
@@ -2485,7 +2507,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPush;
- if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt))
+ if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
+ getTargetMachine().Options.GuaranteedTailCallOpt))
NumBytesForCalleeToPush = NumBytes; // Callee pops everything
else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet)
// If this is a call to a struct-return function, the callee
@@ -2643,7 +2666,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
- if (GuaranteedTailCallOpt) {
+ if (getTargetMachine().Options.GuaranteedTailCallOpt) {
if (IsTailCallConvention(CalleeCC) && CCMatch)
return true;
return false;
@@ -2843,23 +2866,10 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::MOVDDUP:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
- case X86ISD::UNPCKLPS:
- case X86ISD::UNPCKLPD:
- case X86ISD::PUNPCKLWD:
- case X86ISD::PUNPCKLBW:
- case X86ISD::PUNPCKLDQ:
- case X86ISD::PUNPCKLQDQ:
- case X86ISD::UNPCKHPS:
- case X86ISD::UNPCKHPD:
- case X86ISD::PUNPCKHWD:
- case X86ISD::PUNPCKHBW:
- case X86ISD::PUNPCKHDQ:
- case X86ISD::PUNPCKHQDQ:
- case X86ISD::VPERMILPS:
- case X86ISD::VPERMILPSY:
- case X86ISD::VPERMILPD:
- case X86ISD::VPERMILPDY:
- case X86ISD::VPERM2F128:
+ case X86ISD::UNPCKL:
+ case X86ISD::UNPCKH:
+ case X86ISD::VPERMILP:
+ case X86ISD::VPERM2X128:
return true;
}
return false;
@@ -2885,10 +2895,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
- case X86ISD::VPERMILPS:
- case X86ISD::VPERMILPSY:
- case X86ISD::VPERMILPD:
- case X86ISD::VPERMILPDY:
+ case X86ISD::VPERMILP:
return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
}
@@ -2902,7 +2909,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::PALIGN:
case X86ISD::SHUFPD:
case X86ISD::SHUFPS:
- case X86ISD::VPERM2F128:
+ case X86ISD::VPERM2X128:
return DAG.getNode(Opc, dl, VT, V1, V2,
DAG.getConstant(TargetMask, MVT::i8));
}
@@ -2920,18 +2927,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::MOVLPD:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
- case X86ISD::UNPCKLPS:
- case X86ISD::UNPCKLPD:
- case X86ISD::PUNPCKLWD:
- case X86ISD::PUNPCKLBW:
- case X86ISD::PUNPCKLDQ:
- case X86ISD::PUNPCKLQDQ:
- case X86ISD::UNPCKHPS:
- case X86ISD::UNPCKHPD:
- case X86ISD::PUNPCKHWD:
- case X86ISD::PUNPCKHBW:
- case X86ISD::PUNPCKHDQ:
- case X86ISD::PUNPCKHQDQ:
+ case X86ISD::UNPCKL:
+ case X86ISD::UNPCKH:
return DAG.getNode(Opc, dl, VT, V1, V2);
}
return SDValue();
@@ -3231,7 +3228,7 @@ bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
bool hasSSSE3OrAVX) {
int i, e = VT.getVectorNumElements();
- if (VT.getSizeInBits() != 128 && VT.getSizeInBits() != 64)
+ if (VT.getSizeInBits() != 128)
return false;
// Do not handle v2i64 / v2f64 shuffles with palignr.
@@ -3261,17 +3258,17 @@ static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
return true;
}
-/// isVSHUFPSYMask - Return true if the specified VECTOR_SHUFFLE operand
+/// isVSHUFPYMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to 256-bit
/// VSHUFPSY.
-static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT,
- const X86Subtarget *Subtarget) {
+static bool isVSHUFPYMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool HasAVX, bool Commuted = false) {
int NumElems = VT.getVectorNumElements();
- if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+ if (!HasAVX || VT.getSizeInBits() != 256)
return false;
- if (NumElems != 8)
+ if (NumElems != 4 && NumElems != 8)
return false;
// VSHUFPSY divides the resulting vector into 4 chunks.
@@ -3284,124 +3281,63 @@ static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT,
// DST => Y7..Y4, Y7..Y4, X7..X4, X7..X4,
// Y3..Y0, Y3..Y0, X3..X0, X3..X0
//
- int QuarterSize = NumElems/4;
- int HalfSize = QuarterSize*2;
- for (int i = 0; i < QuarterSize; ++i)
- if (!isUndefOrInRange(Mask[i], 0, HalfSize))
- return false;
- for (int i = QuarterSize; i < QuarterSize*2; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
- return false;
-
- // The mask of the second half must be the same as the first but with
- // the appropriate offsets. This works in the same way as VPERMILPS
- // works with masks.
- for (int i = QuarterSize*2; i < QuarterSize*3; ++i) {
- if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
- return false;
- int FstHalfIdx = i-HalfSize;
- if (Mask[FstHalfIdx] < 0)
- continue;
- if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
- return false;
- }
- for (int i = QuarterSize*3; i < NumElems; ++i) {
- if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
- return false;
- int FstHalfIdx = i-HalfSize;
- if (Mask[FstHalfIdx] < 0)
- continue;
- if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
- return false;
-
- }
-
- return true;
-}
-
-/// getShuffleVSHUFPSYImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VSHUFPSY instruction.
-static unsigned getShuffleVSHUFPSYImmediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- EVT VT = SVOp->getValueType(0);
- int NumElems = VT.getVectorNumElements();
-
- assert(NumElems == 8 && VT.getSizeInBits() == 256 &&
- "Only supports v8i32 and v8f32 types");
-
- int HalfSize = NumElems/2;
- unsigned Mask = 0;
- for (int i = 0; i != NumElems ; ++i) {
- if (SVOp->getMaskElt(i) < 0)
- continue;
- // The mask of the first half must be equal to the second one.
- unsigned Shamt = (i%HalfSize)*2;
- unsigned Elt = SVOp->getMaskElt(i) % HalfSize;
- Mask |= Elt << Shamt;
- }
-
- return Mask;
-}
-
-/// isVSHUFPDYMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to 256-bit
-/// VSHUFPDY. This shuffle doesn't have the same restriction as the PS
-/// version and the mask of the second half isn't binded with the first
-/// one.
-static bool isVSHUFPDYMask(const SmallVectorImpl<int> &Mask, EVT VT,
- const X86Subtarget *Subtarget) {
- int NumElems = VT.getVectorNumElements();
-
- if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
- return false;
-
- if (NumElems != 4)
- return false;
-
- // VSHUFPSY divides the resulting vector into 4 chunks.
+ // VSHUFPDY divides the resulting vector into 4 chunks.
// The sources are also splitted into 4 chunks, and each destination
// chunk must come from a different source chunk.
//
// SRC1 => X3 X2 X1 X0
// SRC2 => Y3 Y2 Y1 Y0
//
- // DST => Y2..Y3, X2..X3, Y1..Y0, X1..X0
+ // DST => Y3..Y2, X3..X2, Y1..Y0, X1..X0
//
- int QuarterSize = NumElems/4;
- int HalfSize = QuarterSize*2;
- for (int i = 0; i < QuarterSize; ++i)
- if (!isUndefOrInRange(Mask[i], 0, HalfSize))
- return false;
- for (int i = QuarterSize; i < QuarterSize*2; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
- return false;
- for (int i = QuarterSize*2; i < QuarterSize*3; ++i)
- if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
- return false;
- for (int i = QuarterSize*3; i < NumElems; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
- return false;
+ unsigned QuarterSize = NumElems/4;
+ unsigned HalfSize = QuarterSize*2;
+ for (unsigned l = 0; l != 2; ++l) {
+ unsigned LaneStart = l*HalfSize;
+ for (unsigned s = 0; s != 2; ++s) {
+ unsigned QuarterStart = s*QuarterSize;
+ unsigned Src = (Commuted) ? (1-s) : s;
+ unsigned SrcStart = Src*NumElems + LaneStart;
+ for (unsigned i = 0; i != QuarterSize; ++i) {
+ int Idx = Mask[i+QuarterStart+LaneStart];
+ if (!isUndefOrInRange(Idx, SrcStart, SrcStart+HalfSize))
+ return false;
+ // For VSHUFPSY, the mask of the second half must be the same as the first
+ // but with the appropriate offsets. This works in the same way as
+ // VPERMILPS works with masks.
+ if (NumElems == 4 || l == 0 || Mask[i+QuarterStart] < 0)
+ continue;
+ if (!isUndefOrEqual(Idx, Mask[i+QuarterStart]+HalfSize))
+ return false;
+ }
+ }
+ }
return true;
}
-/// getShuffleVSHUFPDYImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VSHUFPDY instruction.
-static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) {
+/// getShuffleVSHUFPYImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VSHUFPSY/VSHUFPDY instructions.
+static unsigned getShuffleVSHUFPYImmediate(SDNode *N) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
EVT VT = SVOp->getValueType(0);
int NumElems = VT.getVectorNumElements();
- assert(NumElems == 4 && VT.getSizeInBits() == 256 &&
- "Only supports v4i64 and v4f64 types");
+ assert(VT.getSizeInBits() == 256 && "Only supports 256-bit types");
+ assert((NumElems == 4 || NumElems == 8) && "Only supports v4 and v8 types");
int HalfSize = NumElems/2;
+ unsigned Mul = (NumElems == 8) ? 2 : 1;
unsigned Mask = 0;
- for (int i = 0; i != NumElems ; ++i) {
- if (SVOp->getMaskElt(i) < 0)
+ for (int i = 0; i != NumElems; ++i) {
+ int Elt = SVOp->getMaskElt(i);
+ if (Elt < 0)
continue;
- int Elt = SVOp->getMaskElt(i) % HalfSize;
- Mask |= Elt << i;
+ Elt %= HalfSize;
+ unsigned Shamt = i;
+ // For VSHUFPSY, the mask of the first half must be equal to the second one.
+ if (NumElems == 8) Shamt %= HalfSize;
+ Mask |= Elt << (Shamt*Mul);
}
return Mask;
@@ -3409,8 +3345,8 @@ static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) {
/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
/// the two vector operands have swapped position.
-static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
- unsigned NumElems = VT.getVectorNumElements();
+static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
+ unsigned NumElems) {
for (unsigned i = 0; i != NumElems; ++i) {
int idx = Mask[i];
if (idx < 0)
@@ -3422,31 +3358,13 @@ static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
}
}
-/// isCommutedVSHUFP() - Return true if swapping operands will
-/// allow to use the "vshufpd" or "vshufps" instruction
-/// for 256-bit vectors
-static bool isCommutedVSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT,
- const X86Subtarget *Subtarget) {
-
- unsigned NumElems = VT.getVectorNumElements();
- if ((VT.getSizeInBits() != 256) || ((NumElems != 4) && (NumElems != 8)))
- return false;
-
- SmallVector<int, 8> CommutedMask;
- for (unsigned i = 0; i < NumElems; ++i)
- CommutedMask.push_back(Mask[i]);
-
- CommuteVectorShuffleMask(CommutedMask, VT);
- return (NumElems == 4) ? isVSHUFPDYMask(CommutedMask, VT, Subtarget):
- isVSHUFPSYMask(CommutedMask, VT, Subtarget);
-}
-
-
/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to 128-bit
-/// SHUFPS and SHUFPD.
-static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
- int NumElems = VT.getVectorNumElements();
+/// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be
+/// reverse of what x86 shuffles want.
+static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool Commuted = false) {
+ unsigned NumElems = VT.getVectorNumElements();
if (VT.getSizeInBits() != 128)
return false;
@@ -3454,12 +3372,14 @@ static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
if (NumElems != 2 && NumElems != 4)
return false;
- int Half = NumElems / 2;
- for (int i = 0; i < Half; ++i)
- if (!isUndefOrInRange(Mask[i], 0, NumElems))
+ unsigned Half = NumElems / 2;
+ unsigned SrcStart = Commuted ? NumElems : 0;
+ for (unsigned i = 0; i != Half; ++i)
+ if (!isUndefOrInRange(Mask[i], SrcStart, SrcStart+NumElems))
return false;
- for (int i = Half; i < NumElems; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
+ SrcStart = Commuted ? 0 : NumElems;
+ for (unsigned i = Half; i != NumElems; ++i)
+ if (!isUndefOrInRange(Mask[i], SrcStart, SrcStart+NumElems))
return false;
return true;
@@ -3471,32 +3391,6 @@ bool X86::isSHUFPMask(ShuffleVectorSDNode *N) {
return ::isSHUFPMask(M, N->getValueType(0));
}
-/// isCommutedSHUFP - Returns true if the shuffle mask is exactly
-/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
-/// half elements to come from vector 1 (which would equal the dest.) and
-/// the upper half to come from vector 2.
-static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
- int NumElems = VT.getVectorNumElements();
-
- if (NumElems != 2 && NumElems != 4)
- return false;
-
- int Half = NumElems / 2;
- for (int i = 0; i < Half; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
- return false;
- for (int i = Half; i < NumElems; ++i)
- if (!isUndefOrInRange(Mask[i], 0, NumElems))
- return false;
- return true;
-}
-
-static bool isCommutedSHUFP(ShuffleVectorSDNode *N) {
- SmallVector<int, 8> M;
- N->getMask(M);
- return isCommutedSHUFPMask(M, N->getValueType(0));
-}
-
/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
@@ -3765,15 +3659,15 @@ bool X86::isMOVLMask(ShuffleVectorSDNode *N) {
return ::isMOVLMask(M, N->getValueType(0));
}
-/// isVPERM2F128Mask - Match 256-bit shuffles where the elements are considered
+/// isVPERM2X128Mask - Match 256-bit shuffles where the elements are considered
/// as permutations between 128-bit chunks or halves. As an example: this
/// shuffle bellow:
/// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
/// The first half comes from the second half of V1 and the second half from the
/// the second half of V2.
-static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
- const X86Subtarget *Subtarget) {
- if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+static bool isVPERM2X128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool HasAVX) {
+ if (!HasAVX || VT.getSizeInBits() != 256)
return false;
// The shuffle result is divided into half A and half B. In total the two
@@ -3801,10 +3695,9 @@ static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
return MatchA && MatchB;
}
-/// getShuffleVPERM2F128Immediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VPERM2F128 instructions.
-static unsigned getShuffleVPERM2F128Immediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+/// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions.
+static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
EVT VT = SVOp->getValueType(0);
int HalfSize = VT.getVectorNumElements()/2;
@@ -3826,81 +3719,47 @@ static unsigned getShuffleVPERM2F128Immediate(SDNode *N) {
return (FstHalf | (SndHalf << 4));
}
-/// isVPERMILPDMask - Return true if the specified VECTOR_SHUFFLE operand
+/// isVPERMILPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
/// Note that VPERMIL mask matching is different depending whether theunderlying
/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
/// to the same elements of the low, but to the higher half of the source.
/// In VPERMILPD the two lanes could be shuffled independently of each other
/// with the same restriction that lanes can't be crossed.
-static bool isVPERMILPDMask(const SmallVectorImpl<int> &Mask, EVT VT,
- const X86Subtarget *Subtarget) {
+static bool isVPERMILPMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool HasAVX) {
int NumElts = VT.getVectorNumElements();
int NumLanes = VT.getSizeInBits()/128;
- if (!Subtarget->hasAVX())
+ if (!HasAVX)
return false;
- // Only match 256-bit with 64-bit types
- if (VT.getSizeInBits() != 256 || NumElts != 4)
+ // Only match 256-bit with 32/64-bit types
+ if (VT.getSizeInBits() != 256 || (NumElts != 4 && NumElts != 8))
return false;
- // The mask on the high lane is independent of the low. Both can match
- // any element in inside its own lane, but can't cross.
int LaneSize = NumElts/NumLanes;
- for (int l = 0; l < NumLanes; ++l)
- for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) {
- int LaneStart = l*LaneSize;
- if (!isUndefOrInRange(Mask[i], LaneStart, LaneStart+LaneSize))
+ for (int l = 0; l != NumLanes; ++l) {
+ int LaneStart = l*LaneSize;
+ for (int i = 0; i != LaneSize; ++i) {
+ if (!isUndefOrInRange(Mask[i+LaneStart], LaneStart, LaneStart+LaneSize))
+ return false;
+ if (NumElts == 4 || l == 0)
+ continue;
+ // VPERMILPS handling
+ if (Mask[i] < 0)
+ continue;
+ if (!isUndefOrEqual(Mask[i+LaneStart], Mask[i]+LaneSize))
return false;
}
-
- return true;
-}
-
-/// isVPERMILPSMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to VPERMILPS*.
-/// Note that VPERMIL mask matching is different depending whether theunderlying
-/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
-/// to the same elements of the low, but to the higher half of the source.
-/// In VPERMILPD the two lanes could be shuffled independently of each other
-/// with the same restriction that lanes can't be crossed.
-static bool isVPERMILPSMask(const SmallVectorImpl<int> &Mask, EVT VT,
- const X86Subtarget *Subtarget) {
- unsigned NumElts = VT.getVectorNumElements();
- unsigned NumLanes = VT.getSizeInBits()/128;
-
- if (!Subtarget->hasAVX())
- return false;
-
- // Only match 256-bit with 32-bit types
- if (VT.getSizeInBits() != 256 || NumElts != 8)
- return false;
-
- // The mask on the high lane should be the same as the low. Actually,
- // they can differ if any of the corresponding index in a lane is undef
- // and the other stays in range.
- int LaneSize = NumElts/NumLanes;
- for (int i = 0; i < LaneSize; ++i) {
- int HighElt = i+LaneSize;
- bool HighValid = isUndefOrInRange(Mask[HighElt], LaneSize, NumElts);
- bool LowValid = isUndefOrInRange(Mask[i], 0, LaneSize);
-
- if (!HighValid || !LowValid)
- return false;
- if (Mask[i] < 0 || Mask[HighElt] < 0)
- continue;
- if (Mask[HighElt]-Mask[i] != LaneSize)
- return false;
}
return true;
}
-/// getShuffleVPERMILPSImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VPERMILPS* instructions.
-static unsigned getShuffleVPERMILPSImmediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+/// getShuffleVPERMILPImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERMILPS/D* instructions.
+static unsigned getShuffleVPERMILPImmediate(ShuffleVectorSDNode *SVOp) {
EVT VT = SVOp->getValueType(0);
int NumElts = VT.getVectorNumElements();
@@ -3911,43 +3770,22 @@ static unsigned getShuffleVPERMILPSImmediate(SDNode *N) {
// where a mask will match because the same mask element is undef on the
// first half but valid on the second. This would get pathological cases
// such as: shuffle <u, 0, 1, 2, 4, 4, 5, 6>, which is completely valid.
+ unsigned Shift = (LaneSize == 4) ? 2 : 1;
unsigned Mask = 0;
- for (int l = 0; l < NumLanes; ++l) {
- for (int i = 0; i < LaneSize; ++i) {
- int MaskElt = SVOp->getMaskElt(i+(l*LaneSize));
- if (MaskElt < 0)
- continue;
- if (MaskElt >= LaneSize)
- MaskElt -= LaneSize;
- Mask |= MaskElt << (i*2);
- }
+ for (int i = 0; i != NumElts; ++i) {
+ int MaskElt = SVOp->getMaskElt(i);
+ if (MaskElt < 0)
+ continue;
+ MaskElt %= LaneSize;
+ unsigned Shamt = i;
+ // VPERMILPSY, the mask of the first half must be equal to the second one
+ if (NumElts == 8) Shamt %= LaneSize;
+ Mask |= MaskElt << (Shamt*Shift);
}
return Mask;
}
-/// getShuffleVPERMILPDImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VPERMILPD* instructions.
-static unsigned getShuffleVPERMILPDImmediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- EVT VT = SVOp->getValueType(0);
-
- int NumElts = VT.getVectorNumElements();
- int NumLanes = VT.getSizeInBits()/128;
-
- unsigned Mask = 0;
- int LaneSize = NumElts/NumLanes;
- for (int l = 0; l < NumLanes; ++l)
- for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) {
- int MaskElt = SVOp->getMaskElt(i);
- if (MaskElt < 0)
- continue;
- Mask |= (MaskElt-l*LaneSize) << i;
- }
-
- return Mask;
-}
-
/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
/// of what x86 movss want. X86 movs requires the lowest element to be lowest
/// element of vector 2 and the other elements to come from vector 1 in order.
@@ -4035,21 +3873,18 @@ bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N,
/// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to 256-bit
/// version of MOVDDUP.
-static bool isMOVDDUPYMask(ShuffleVectorSDNode *N,
- const X86Subtarget *Subtarget) {
- EVT VT = N->getValueType(0);
+static bool isMOVDDUPYMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool HasAVX) {
int NumElts = VT.getVectorNumElements();
- bool V2IsUndef = N->getOperand(1).getOpcode() == ISD::UNDEF;
- if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256 ||
- !V2IsUndef || NumElts != 4)
+ if (!HasAVX || VT.getSizeInBits() != 256 || NumElts != 4)
return false;
for (int i = 0; i != NumElts/2; ++i)
- if (!isUndefOrEqual(N->getMaskElt(i), 0))
+ if (!isUndefOrEqual(Mask[i], 0))
return false;
for (int i = NumElts/2; i != NumElts; ++i)
- if (!isUndefOrEqual(N->getMaskElt(i), NumElts/2))
+ if (!isUndefOrEqual(Mask[i], NumElts/2))
return false;
return true;
}
@@ -4164,14 +3999,13 @@ unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
-unsigned X86::getShufflePALIGNRImmediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- EVT VVT = N->getValueType(0);
- unsigned EltSize = VVT.getVectorElementType().getSizeInBits() >> 3;
+static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
+ EVT VT = SVOp->getValueType(0);
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3;
int Val = 0;
unsigned i, e;
- for (i = 0, e = VVT.getVectorNumElements(); i != e; ++i) {
+ for (i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
Val = SVOp->getMaskElt(i);
if (Val >= 0)
break;
@@ -4631,29 +4465,14 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
case X86ISD::SHUFPS:
case X86ISD::SHUFPD:
ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeSHUFPSMask(NumElems,
- cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::PUNPCKHBW:
- case X86ISD::PUNPCKHWD:
- case X86ISD::PUNPCKHDQ:
- case X86ISD::PUNPCKHQDQ:
- DecodePUNPCKHMask(NumElems, ShuffleMask);
- break;
- case X86ISD::UNPCKHPS:
- case X86ISD::UNPCKHPD:
- DecodeUNPCKHPMask(VT, ShuffleMask);
+ DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
break;
- case X86ISD::PUNPCKLBW:
- case X86ISD::PUNPCKLWD:
- case X86ISD::PUNPCKLDQ:
- case X86ISD::PUNPCKLQDQ:
- DecodePUNPCKLMask(VT, ShuffleMask);
+ case X86ISD::UNPCKH:
+ DecodeUNPCKHMask(VT, ShuffleMask);
break;
- case X86ISD::UNPCKLPS:
- case X86ISD::UNPCKLPD:
- DecodeUNPCKLPMask(VT, ShuffleMask);
+ case X86ISD::UNPCKL:
+ DecodeUNPCKLMask(VT, ShuffleMask);
break;
case X86ISD::MOVHLPS:
DecodeMOVHLPSMask(NumElems, ShuffleMask);
@@ -4686,27 +4505,12 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
Depth+1);
}
- case X86ISD::VPERMILPS:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeVPERMILPSMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::VPERMILPSY:
+ case X86ISD::VPERMILP:
ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeVPERMILPSMask(8, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ DecodeVPERMILPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
ShuffleMask);
break;
- case X86ISD::VPERMILPD:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeVPERMILPDMask(2, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::VPERMILPDY:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeVPERMILPDMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::VPERM2F128:
+ case X86ISD::VPERM2X128:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERM2F128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
ShuffleMask);
@@ -5334,8 +5138,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DAG);
} else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
- assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
- EVT MiddleVT = MVT::v4i32;
+ unsigned NumBits = VT.getSizeInBits();
+ assert((NumBits == 128 || NumBits == 256) &&
+ "Expected an SSE or AVX value type!");
+ EVT MiddleVT = NumBits == 128 ? MVT::v4i32 : MVT::v8i32;
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true,
Subtarget->hasXMMInt(), DAG);
@@ -6256,7 +6062,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
// from X.
if (NumHi == 3) {
// Normalize it so the 3 elements come from V1.
- CommuteVectorShuffleMask(PermMask, VT);
+ CommuteVectorShuffleMask(PermMask, 4);
std::swap(V1, V2);
}
@@ -6566,70 +6372,6 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {
X86::getShuffleSHUFImmediate(SVOp), DAG);
}
-static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) {
- switch(VT.getSimpleVT().SimpleTy) {
- case MVT::v4i32: return X86ISD::PUNPCKLDQ;
- case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
- case MVT::v8i32:
- if (HasAVX2) return X86ISD::PUNPCKLDQ;
- // else use fp unit for int unpack.
- case MVT::v8f32:
- case MVT::v4f32: return X86ISD::UNPCKLPS;
- case MVT::v4i64:
- if (HasAVX2) return X86ISD::PUNPCKLQDQ;
- // else use fp unit for int unpack.
- case MVT::v4f64:
- case MVT::v2f64: return X86ISD::UNPCKLPD;
- case MVT::v32i8:
- case MVT::v16i8: return X86ISD::PUNPCKLBW;
- case MVT::v16i16:
- case MVT::v8i16: return X86ISD::PUNPCKLWD;
- default:
- llvm_unreachable("Unknown type for unpckl");
- }
- return 0;
-}
-
-static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) {
- switch(VT.getSimpleVT().SimpleTy) {
- case MVT::v4i32: return X86ISD::PUNPCKHDQ;
- case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
- case MVT::v8i32:
- if (HasAVX2) return X86ISD::PUNPCKHDQ;
- // else use fp unit for int unpack.
- case MVT::v8f32:
- case MVT::v4f32: return X86ISD::UNPCKHPS;
- case MVT::v4i64:
- if (HasAVX2) return X86ISD::PUNPCKHQDQ;
- // else use fp unit for int unpack.
- case MVT::v4f64:
- case MVT::v2f64: return X86ISD::UNPCKHPD;
- case MVT::v32i8:
- case MVT::v16i8: return X86ISD::PUNPCKHBW;
- case MVT::v16i16:
- case MVT::v8i16: return X86ISD::PUNPCKHWD;
- default:
- llvm_unreachable("Unknown type for unpckh");
- }
- return 0;
-}
-
-static inline unsigned getVPERMILOpcode(EVT VT) {
- switch(VT.getSimpleVT().SimpleTy) {
- case MVT::v4i32:
- case MVT::v4f32: return X86ISD::VPERMILPS;
- case MVT::v2i64:
- case MVT::v2f64: return X86ISD::VPERMILPD;
- case MVT::v8i32:
- case MVT::v8f32: return X86ISD::VPERMILPSY;
- case MVT::v4i64:
- case MVT::v4f64: return X86ISD::VPERMILPDY;
- default:
- llvm_unreachable("Unknown type for vpermil");
- }
- return 0;
-}
-
static
SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI,
@@ -6703,17 +6445,19 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
unsigned NumElems = VT.getVectorNumElements();
- bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
bool V1IsSplat = false;
bool V2IsSplat = false;
bool HasXMMInt = Subtarget->hasXMMInt();
+ bool HasAVX = Subtarget->hasAVX();
bool HasAVX2 = Subtarget->hasAVX2();
MachineFunction &MF = DAG.getMachineFunction();
bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
+ assert(V1.getOpcode() != ISD::UNDEF && "Op 1 of shuffle should not be undef");
+
// Vector shuffle lowering takes 3 steps:
//
// 1) Normalize the input vectors. Here splats, zeroed vectors, profitable
@@ -6738,11 +6482,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
// unpckh_undef). Only use pshufd if speed is more important than size.
if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
- DAG);
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
- return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
- DAG);
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
if (X86::isMOVDDUPMask(SVOp) && Subtarget->hasSSE3orAVX() &&
V2IsUndef && RelaxedMayFoldVectorLoad(V1))
@@ -6754,8 +6496,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// Use to match splats
if (HasXMMInt && X86::isUNPCKHMask(SVOp, HasAVX2) && V2IsUndef &&
(VT == MVT::v2f64 || VT == MVT::v2i64))
- return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
- DAG);
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
if (X86::isPSHUFDMask(SVOp)) {
// The actual implementation will match the mask in the if above and then
@@ -6787,8 +6528,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
if (X86::isMOVLMask(SVOp)) {
- if (V1IsUndef)
- return V2;
if (ISD::isBuildVectorAllZeros(V1.getNode()))
return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
if (!X86::isMOVLPMask(SVOp)) {
@@ -6834,17 +6573,19 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
V2IsSplat = isSplatVector(V2.getNode());
// Canonicalize the splat or undef, if present, to be on the RHS.
- if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
+ if (V1IsSplat && !V2IsSplat) {
Op = CommuteVectorShuffle(SVOp, DAG);
SVOp = cast<ShuffleVectorSDNode>(Op);
V1 = SVOp->getOperand(0);
V2 = SVOp->getOperand(1);
std::swap(V1IsSplat, V2IsSplat);
- std::swap(V1IsUndef, V2IsUndef);
Commuted = true;
}
- if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) {
+ SmallVector<int, 32> M;
+ SVOp->getMask(M);
+
+ if (isCommutedMOVLMask(M, VT, V2IsSplat, V2IsUndef)) {
// Shuffling low element of v1 into undef, just return v1.
if (V2IsUndef)
return V1;
@@ -6854,13 +6595,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getMOVL(DAG, dl, VT, V2, V1);
}
- if (X86::isUNPCKLMask(SVOp, HasAVX2))
- return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V2,
- DAG);
+ if (isUNPCKLMask(M, VT, HasAVX2))
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
- if (X86::isUNPCKHMask(SVOp, HasAVX2))
- return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V2,
- DAG);
+ if (isUNPCKHMask(M, VT, HasAVX2))
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
@@ -6884,35 +6623,30 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
if (X86::isUNPCKLMask(NewSVOp, HasAVX2))
- return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V2, V1,
- DAG);
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V2, V1, DAG);
if (X86::isUNPCKHMask(NewSVOp, HasAVX2))
- return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V2, V1,
- DAG);
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V2, V1, DAG);
}
// Normalize the node to match x86 shuffle ops if needed
- if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
+ if (!V2IsUndef && (isSHUFPMask(M, VT, /* Commuted */ true) ||
+ isVSHUFPYMask(M, VT, HasAVX, /* Commuted */ true)))
return CommuteVectorShuffle(SVOp, DAG);
// The checks below are all present in isShuffleMaskLegal, but they are
// inlined here right now to enable us to directly emit target specific
// nodes, and remove one by one until they don't return Op anymore.
- SmallVector<int, 16> M;
- SVOp->getMask(M);
if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX()))
return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
- X86::getShufflePALIGNRImmediate(SVOp),
+ getShufflePALIGNRImmediate(SVOp),
DAG);
if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
SVOp->getSplatIndex() == 0 && V2IsUndef) {
- if (VT == MVT::v2f64)
- return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
- if (VT == MVT::v2i64)
- return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
+ if (VT == MVT::v2f64 || VT == MVT::v2i64)
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
}
if (isPSHUFHWMask(M, VT))
@@ -6929,12 +6663,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
X86::getShuffleSHUFImmediate(SVOp), DAG);
- if (X86::isUNPCKL_v_undef_Mask(SVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
- DAG);
- if (X86::isUNPCKH_v_undef_Mask(SVOp))
- return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
- DAG);
+ if (isUNPCKL_v_undef_Mask(M, VT))
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
+ if (isUNPCKH_v_undef_Mask(M, VT))
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
//===--------------------------------------------------------------------===//
// Generate target specific nodes for 128 or 256-bit shuffles only
@@ -6942,44 +6674,23 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
//
// Handle VMOVDDUPY permutations
- if (isMOVDDUPYMask(SVOp, Subtarget))
+ if (V2IsUndef && isMOVDDUPYMask(M, VT, HasAVX))
return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
- // Handle VPERMILPS* permutations
- if (isVPERMILPSMask(M, VT, Subtarget))
- return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
- getShuffleVPERMILPSImmediate(SVOp), DAG);
-
- // Handle VPERMILPD* permutations
- if (isVPERMILPDMask(M, VT, Subtarget))
- return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
- getShuffleVPERMILPDImmediate(SVOp), DAG);
+ // Handle VPERMILPS/D* permutations
+ if (isVPERMILPMask(M, VT, HasAVX))
+ return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
+ getShuffleVPERMILPImmediate(SVOp), DAG);
- // Handle VPERM2F128 permutations
- if (isVPERM2F128Mask(M, VT, Subtarget))
- return getTargetShuffleNode(X86ISD::VPERM2F128, dl, VT, V1, V2,
- getShuffleVPERM2F128Immediate(SVOp), DAG);
+ // Handle VPERM2F128/VPERM2I128 permutations
+ if (isVPERM2X128Mask(M, VT, HasAVX))
+ return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
+ V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
- // Handle VSHUFPSY permutations
- if (isVSHUFPSYMask(M, VT, Subtarget))
+ // Handle VSHUFPS/DY permutations
+ if (isVSHUFPYMask(M, VT, HasAVX))
return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
- getShuffleVSHUFPSYImmediate(SVOp), DAG);
-
- // Handle VSHUFPDY permutations
- if (isVSHUFPDYMask(M, VT, Subtarget))
- return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
- getShuffleVSHUFPDYImmediate(SVOp), DAG);
-
- // Try to swap operands in the node to match x86 shuffle ops
- if (isCommutedVSHUFPMask(M, VT, Subtarget)) {
- // Now we need to commute operands.
- SVOp = cast<ShuffleVectorSDNode>(CommuteVectorShuffle(SVOp, DAG));
- V1 = SVOp->getOperand(0);
- V2 = SVOp->getOperand(1);
- unsigned Immediate = (NumElems == 4) ? getShuffleVSHUFPDYImmediate(SVOp):
- getShuffleVSHUFPSYImmediate(SVOp);
- return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, Immediate, DAG);
- }
+ getShuffleVSHUFPYImmediate(SVOp), DAG);
//===--------------------------------------------------------------------===//
// Since no target specific shuffle was selected for this generic one,
@@ -7888,7 +7599,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
LLVMContext *Context = DAG.getContext();
// Build some magic constants.
- std::vector<Constant*> CV0;
+ SmallVector<Constant*,4> CV0;
CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000)));
CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000)));
CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
@@ -7896,7 +7607,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
Constant *C0 = ConstantVector::get(CV0);
SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
- std::vector<Constant*> CV1;
+ SmallVector<Constant*,2> CV1;
CV1.push_back(
ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL))));
CV1.push_back(
@@ -8176,17 +7887,13 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op,
EVT EltVT = VT;
if (VT.isVector())
EltVT = VT.getVectorElementType();
- std::vector<Constant*> CV;
+ SmallVector<Constant*,4> CV;
if (EltVT == MVT::f64) {
Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))));
- CV.push_back(C);
- CV.push_back(C);
+ CV.assign(2, C);
} else {
Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))));
- CV.push_back(C);
- CV.push_back(C);
- CV.push_back(C);
- CV.push_back(C);
+ CV.assign(4, C);
}
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
@@ -8201,19 +7908,18 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
EVT EltVT = VT;
- if (VT.isVector())
+ unsigned NumElts = VT == MVT::f64 ? 2 : 4;
+ if (VT.isVector()) {
EltVT = VT.getVectorElementType();
- std::vector<Constant*> CV;
+ NumElts = VT.getVectorNumElements();
+ }
+ SmallVector<Constant*,8> CV;
if (EltVT == MVT::f64) {
Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)));
- CV.push_back(C);
- CV.push_back(C);
+ CV.assign(NumElts, C);
} else {
Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
- CV.push_back(C);
- CV.push_back(C);
- CV.push_back(C);
- CV.push_back(C);
+ CV.assign(NumElts, C);
}
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
@@ -8221,11 +7927,12 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
if (VT.isVector()) {
+ MVT XORVT = VT.getSizeInBits() == 128 ? MVT::v2i64 : MVT::v4i64;
return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getNode(ISD::XOR, dl, MVT::v2i64,
- DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+ DAG.getNode(ISD::XOR, dl, XORVT,
+ DAG.getNode(ISD::BITCAST, dl, XORVT,
Op.getOperand(0)),
- DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Mask)));
+ DAG.getNode(ISD::BITCAST, dl, XORVT, Mask)));
} else {
return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
}
@@ -8254,7 +7961,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
// type, and that won't be f80 since that is not custom lowered.
// First get the sign bit of second operand.
- std::vector<Constant*> CV;
+ SmallVector<Constant*,4> CV;
if (SrcVT == MVT::f64) {
CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))));
CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
@@ -9253,7 +8960,7 @@ SDValue
X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows() ||
- EnableSegmentedStacks) &&
+ getTargetMachine().Options.EnableSegmentedStacks) &&
"This should be used only on Windows targets or when segmented stacks "
"are being used");
assert(!Subtarget->isTargetEnvMacho() && "Not implemented");
@@ -9267,7 +8974,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
bool Is64Bit = Subtarget->is64Bit();
EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;
- if (EnableSegmentedStacks) {
+ if (getTargetMachine().Options.EnableSegmentedStacks) {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -9403,7 +9110,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
if (ArgMode == 2) {
// Sanity Check: Make sure using fp_offset makes sense.
- assert(!UseSoftFloat &&
+ assert(!getTargetMachine().Options.UseSoftFloat &&
!(DAG.getMachineFunction()
.getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
Subtarget->hasXMM());
@@ -10472,7 +10179,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
DAG.getConstant(4, MVT::i32));
- R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M);
+ R = DAG.getNode(ISD::VSELECT, dl, VT, Op, M, R);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
@@ -10487,13 +10194,13 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
DAG.getConstant(2, MVT::i32));
- R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M);
+ R = DAG.getNode(ISD::VSELECT, dl, VT, Op, M, R);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
// return pblendv(r, r+r, a);
R = DAG.getNode(ISD::VSELECT, dl, VT, Op,
- R, DAG.getNode(ISD::ADD, dl, VT, R, R));
+ DAG.getNode(ISD::ADD, dl, VT, R, R), R);
return R;
}
@@ -11194,6 +10901,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::ANDNP: return "X86ISD::ANDNP";
case X86ISD::PSIGN: return "X86ISD::PSIGN";
case X86ISD::BLENDV: return "X86ISD::BLENDV";
+ case X86ISD::HADD: return "X86ISD::HADD";
+ case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
case X86ISD::FHSUB: return "X86ISD::FHSUB";
case X86ISD::FMAX: return "X86ISD::FMAX";
@@ -11266,24 +10975,11 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::MOVSLDUP_LD: return "X86ISD::MOVSLDUP_LD";
case X86ISD::MOVSD: return "X86ISD::MOVSD";
case X86ISD::MOVSS: return "X86ISD::MOVSS";
- case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS";
- case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD";
- case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS";
- case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD";
- case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW";
- case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD";
- case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ";
- case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ";
- case X86ISD::PUNPCKHBW: return "X86ISD::PUNPCKHBW";
- case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD";
- case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ";
- case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ";
+ case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
+ case X86ISD::UNPCKH: return "X86ISD::UNPCKH";
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
- case X86ISD::VPERMILPS: return "X86ISD::VPERMILPS";
- case X86ISD::VPERMILPSY: return "X86ISD::VPERMILPSY";
- case X86ISD::VPERMILPD: return "X86ISD::VPERMILPD";
- case X86ISD::VPERMILPDY: return "X86ISD::VPERMILPDY";
- case X86ISD::VPERM2F128: return "X86ISD::VPERM2F128";
+ case X86ISD::VPERMILP: return "X86ISD::VPERMILP";
+ case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
@@ -11391,7 +11087,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
EVT VT) const {
// Very little shuffling can be done for 64-bit vectors right now.
if (VT.getSizeInBits() == 64)
- return isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX());
+ return false;
// FIXME: pshufb, blends, shifts.
return (VT.getVectorNumElements() == 2 ||
@@ -11419,7 +11115,7 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
return (isMOVLMask(Mask, VT) ||
isCommutedMOVLMask(Mask, VT, true) ||
isSHUFPMask(Mask, VT) ||
- isCommutedSHUFPMask(Mask, VT));
+ isSHUFPMask(Mask, VT, /* Commuted */ true));
}
return false;
}
@@ -12289,7 +11985,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
MachineFunction *MF = BB->getParent();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- assert(EnableSegmentedStacks);
+ assert(getTargetMachine().Options.EnableSegmentedStacks);
unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
unsigned TlsOffset = Is64Bit ? 0x70 : 0x30;
@@ -13169,7 +12865,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
std::swap(LHS, RHS);
@@ -13179,7 +12875,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
case ISD::SETOLE:
// Converting this to a min would handle comparisons between positive
// and negative zero incorrectly.
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
break;
Opcode = X86ISD::FMIN;
@@ -13197,7 +12893,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
case ISD::SETOGE:
// Converting this to a max would handle comparisons between positive
// and negative zero incorrectly.
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
break;
Opcode = X86ISD::FMAX;
@@ -13207,7 +12903,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
std::swap(LHS, RHS);
@@ -13233,7 +12929,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Converting this to a min would handle comparisons between positive
// and negative zero incorrectly, and swapping the operands would
// cause it to handle NaNs incorrectly.
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
@@ -13243,7 +12939,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
break;
case ISD::SETUGT:
// Converting this to a min would handle NaNs incorrectly.
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
(!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
break;
Opcode = X86ISD::FMIN;
@@ -13268,7 +12964,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Converting this to a max would handle comparisons between positive
// and negative zero incorrectly, and swapping the operands would
// cause it to handle NaNs incorrectly.
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
@@ -14048,7 +13744,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
X = DAG.getNode(ISD::BITCAST, DL, BlendVT, X);
Y = DAG.getNode(ISD::BITCAST, DL, BlendVT, Y);
Mask = DAG.getNode(ISD::BITCAST, DL, BlendVT, Mask);
- Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, X, Y);
+ Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X);
return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
}
}
@@ -14232,7 +13928,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue StoredVal = St->getOperand(1);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- // If we are saving a concatination of two XMM registers, perform two stores.
+ // If we are saving a concatenation of two XMM registers, perform two stores.
// This is better in Sandy Bridge cause one 256-bit mem op is done via two
// 128-bit ones. If in the future the cost becomes only one memory access the
// first version would be better.
@@ -14342,7 +14038,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
const Function *F = DAG.getMachineFunction().getFunction();
bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
- bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps
+ bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps
&& Subtarget->hasXMMInt();
if ((VT.isVector() ||
(VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
@@ -14458,7 +14154,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
/// set to A, RHS to B, and the routine returns 'true'.
/// Note that the binary operation should have the property that if one of the
/// operands is UNDEF then the result is UNDEF.
-static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
+static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
// Look for the following pattern: if
// A = < float a0, float a1, float a2, float a3 >
// B = < float b0, float b1, float b2, float b3 >
@@ -14474,7 +14170,18 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
return false;
EVT VT = LHS.getValueType();
- unsigned N = VT.getVectorNumElements();
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unsupported vector type for horizontal add/sub");
+
+ // Handle 128 and 256-bit vector lengths. AVX defines horizontal add/sub to
+ // operate independently on 128-bit lanes.
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+ assert((NumLaneElts % 2 == 0) &&
+ "Vector type should have an even number of elements in each lane");
+ unsigned HalfLaneElts = NumLaneElts/2;
// View LHS in the form
// LHS = VECTOR_SHUFFLE A, B, LMask
@@ -14483,7 +14190,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
// NOTE: in what follows a default initialized SDValue represents an UNDEF of
// type VT.
SDValue A, B;
- SmallVector<int, 8> LMask(N);
+ SmallVector<int, 16> LMask(NumElts);
if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
if (LHS.getOperand(0).getOpcode() != ISD::UNDEF)
A = LHS.getOperand(0);
@@ -14493,14 +14200,14 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
} else {
if (LHS.getOpcode() != ISD::UNDEF)
A = LHS;
- for (unsigned i = 0; i != N; ++i)
+ for (unsigned i = 0; i != NumElts; ++i)
LMask[i] = i;
}
// Likewise, view RHS in the form
// RHS = VECTOR_SHUFFLE C, D, RMask
SDValue C, D;
- SmallVector<int, 8> RMask(N);
+ SmallVector<int, 16> RMask(NumElts);
if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
if (RHS.getOperand(0).getOpcode() != ISD::UNDEF)
C = RHS.getOperand(0);
@@ -14510,7 +14217,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
} else {
if (RHS.getOpcode() != ISD::UNDEF)
C = RHS;
- for (unsigned i = 0; i != N; ++i)
+ for (unsigned i = 0; i != NumElts; ++i)
RMask[i] = i;
}
@@ -14525,30 +14232,28 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
// If A and B occur in reverse order in RHS, then "swap" them (which means
// rewriting the mask).
if (A != C)
- for (unsigned i = 0; i != N; ++i) {
- unsigned Idx = RMask[i];
- if (Idx < N)
- RMask[i] += N;
- else if (Idx < 2*N)
- RMask[i] -= N;
- }
+ CommuteVectorShuffleMask(RMask, NumElts);
// At this point LHS and RHS are equivalent to
// LHS = VECTOR_SHUFFLE A, B, LMask
// RHS = VECTOR_SHUFFLE A, B, RMask
// Check that the masks correspond to performing a horizontal operation.
- for (unsigned i = 0; i != N; ++i) {
- unsigned LIdx = LMask[i], RIdx = RMask[i];
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int LIdx = LMask[i], RIdx = RMask[i];
// Ignore any UNDEF components.
- if (LIdx >= 2*N || RIdx >= 2*N || (!A.getNode() && (LIdx < N || RIdx < N))
- || (!B.getNode() && (LIdx >= N || RIdx >= N)))
+ if (LIdx < 0 || RIdx < 0 ||
+ (!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) ||
+ (!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts)))
continue;
// Check that successive elements are being operated on. If not, this is
// not a horizontal operation.
- if (!(LIdx == 2*i && RIdx == 2*i + 1) &&
- !(isCommutative && LIdx == 2*i + 1 && RIdx == 2*i))
+ unsigned Src = (i/HalfLaneElts) % 2; // each lane is split between srcs
+ unsigned LaneStart = (i/NumLaneElts) * NumLaneElts;
+ int Index = 2*(i%HalfLaneElts) + NumElts*Src + LaneStart;
+ if (!(LIdx == Index && RIdx == Index + 1) &&
+ !(IsCommutative && LIdx == Index + 1 && RIdx == Index))
return false;
}
@@ -14565,7 +14270,8 @@ static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
SDValue RHS = N->getOperand(1);
// Try to synthesize horizontal adds from adds of shuffles.
- if (Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64) &&
+ if (((Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+ (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
isHorizontalBinOp(LHS, RHS, true))
return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS);
return SDValue();
@@ -14579,7 +14285,8 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
SDValue RHS = N->getOperand(1);
// Try to synthesize horizontal subs from subs of shuffles.
- if (Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64) &&
+ if (((Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+ (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
isHorizontalBinOp(LHS, RHS, false))
return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS);
return SDValue();
@@ -14783,7 +14490,8 @@ static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG,
SDValue Op1 = N->getOperand(1);
// Try to synthesize horizontal adds from adds of shuffles.
- if ((Subtarget->hasSSSE3orAVX()) && (VT == MVT::v8i16 || VT == MVT::v4i32) &&
+ if (((Subtarget->hasSSSE3orAVX() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
+ (Subtarget->hasAVX2() && (VT == MVT::v16i16 || MVT::v8i32))) &&
isHorizontalBinOp(Op0, Op1, true))
return DAG.getNode(X86ISD::HADD, N->getDebugLoc(), VT, Op0, Op1);
@@ -14815,8 +14523,9 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG,
// Try to synthesize horizontal adds from adds of shuffles.
EVT VT = N->getValueType(0);
- if ((Subtarget->hasSSSE3orAVX()) && (VT == MVT::v8i16 || VT == MVT::v4i32) &&
- isHorizontalBinOp(Op0, Op1, false))
+ if (((Subtarget->hasSSSE3orAVX() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
+ (Subtarget->hasAVX2() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+ isHorizontalBinOp(Op0, Op1, true))
return DAG.getNode(X86ISD::HSUB, N->getDebugLoc(), VT, Op0, Op1);
return OptimizeConditionalInDecrement(N, DAG);
@@ -14857,18 +14566,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::SHUFPS: // Handle all target specific shuffles
case X86ISD::SHUFPD:
case X86ISD::PALIGN:
- case X86ISD::PUNPCKHBW:
- case X86ISD::PUNPCKHWD:
- case X86ISD::PUNPCKHDQ:
- case X86ISD::PUNPCKHQDQ:
- case X86ISD::UNPCKHPS:
- case X86ISD::UNPCKHPD:
- case X86ISD::PUNPCKLBW:
- case X86ISD::PUNPCKLWD:
- case X86ISD::PUNPCKLDQ:
- case X86ISD::PUNPCKLQDQ:
- case X86ISD::UNPCKLPS:
- case X86ISD::UNPCKLPD:
+ case X86ISD::UNPCKH:
+ case X86ISD::UNPCKL:
case X86ISD::MOVHLPS:
case X86ISD::MOVLHPS:
case X86ISD::PSHUFD:
@@ -14876,11 +14575,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PSHUFLW:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
- case X86ISD::VPERMILPS:
- case X86ISD::VPERMILPSY:
- case X86ISD::VPERMILPD:
- case X86ISD::VPERMILPDY:
- case X86ISD::VPERM2F128:
+ case X86ISD::VPERMILP:
+ case X86ISD::VPERM2X128:
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index ccff3a5..cfc1f88 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -273,23 +273,10 @@ namespace llvm {
MOVLPD,
MOVSD,
MOVSS,
- UNPCKLPS,
- UNPCKLPD,
- UNPCKHPS,
- UNPCKHPD,
- PUNPCKLBW,
- PUNPCKLWD,
- PUNPCKLDQ,
- PUNPCKLQDQ,
- PUNPCKHBW,
- PUNPCKHWD,
- PUNPCKHDQ,
- PUNPCKHQDQ,
- VPERMILPS,
- VPERMILPSY,
- VPERMILPD,
- VPERMILPDY,
- VPERM2F128,
+ UNPCKL,
+ UNPCKH,
+ VPERMILP,
+ VPERM2X128,
VBROADCAST,
// VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
@@ -468,10 +455,6 @@ namespace llvm {
/// the specified VECTOR_SHUFFLE mask with PSHUFLW instruction.
unsigned getShufflePSHUFLWImmediate(SDNode *N);
- /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
- /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
- unsigned getShufflePALIGNRImmediate(SDNode *N);
-
/// getExtractVEXTRACTF128Immediate - Return the appropriate
/// immediate to extract the specified EXTRACT_SUBVECTOR index
/// with VEXTRACTF128 instructions.
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index d868773..f443088 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -58,3 +58,391 @@ let isAsmParserOnly = 1 in {
defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">;
defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W;
}
+
+//===----------------------------------------------------------------------===//
+// FMA4 - AMD 4 operand Fused Multiply-Add instructions
+//===----------------------------------------------------------------------===//
+
+
+multiclass fma4s<bits<8> opc, string OpcodeStr> {
+ def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, XOP_W;
+ def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, XOP_W;
+ def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>;
+
+}
+
+multiclass fma4p<bits<8> opc, string OpcodeStr> {
+ def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, XOP_W;
+ def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, XOP_W;
+ def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>;
+ def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, XOP_W;
+ def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, XOP_W;
+ def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VFMADDSS4 : fma4s<0x6A, "vfmaddss">;
+ defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd">;
+ defm VFMADDPS4 : fma4p<0x68, "vfmaddps">;
+ defm VFMADDPD4 : fma4p<0x69, "vfmaddpd">;
+ defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss">;
+ defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd">;
+ defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps">;
+ defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd">;
+ defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss">;
+ defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd">;
+ defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps">;
+ defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd">;
+ defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss">;
+ defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd">;
+ defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps">;
+ defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd">;
+ defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps">;
+ defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd">;
+ defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps">;
+ defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd">;
+}
+
+// FMA4 Intrinsics patterns
+
+// VFMADD
+def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMADDSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, VR128:$src2,
+ (alignedloadv4f32 addr:$src3)),
+ (VFMADDSS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, (alignedloadv4f32 addr:$src2),
+ VR128:$src3),
+ (VFMADDSS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMADDSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFMADDSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFMADDSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, VR128:$src2,
+ (alignedloadv4f32 addr:$src3)),
+ (VFMADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
+ VR128:$src3),
+ (VFMADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFMADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFMADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFMADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1, VR256:$src2,
+ (alignedloadv8f32 addr:$src3)),
+ (VFMADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1,
+ (alignedloadv8f32 addr:$src2),
+ VR256:$src3),
+ (VFMADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFMADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1, VR256:$src2,
+ (alignedloadv4f64 addr:$src3)),
+ (VFMADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1,
+ (alignedloadv4f64 addr:$src2),
+ VR256:$src3),
+ (VFMADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+// VFMSUB
+def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMSUBSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, VR128:$src2,
+ (alignedloadv4f32 addr:$src3)),
+ (VFMSUBSS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, (alignedloadv4f32 addr:$src2),
+ VR128:$src3),
+ (VFMSUBSS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMSUBSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFMSUBSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFMSUBSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, VR128:$src2,
+ (alignedloadv4f32 addr:$src3)),
+ (VFMSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
+ VR128:$src3),
+ (VFMSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFMSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFMSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFMSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1, VR256:$src2,
+ (alignedloadv8f32 addr:$src3)),
+ (VFMSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1,
+ (alignedloadv8f32 addr:$src2),
+ VR256:$src3),
+ (VFMSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFMSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1, VR256:$src2,
+ (alignedloadv4f64 addr:$src3)),
+ (VFMSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1,
+ (alignedloadv4f64 addr:$src2),
+ VR256:$src3),
+ (VFMSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+// VFNMADD
+def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFNMADDSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, VR128:$src2,
+ (alignedloadv4f32 addr:$src3)),
+ (VFNMADDSS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, (alignedloadv4f32 addr:$src2),
+ VR128:$src3),
+ (VFNMADDSS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFNMADDSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFNMADDSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFNMADDSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFNMADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, VR128:$src2,
+ (alignedloadv4f32 addr:$src3)),
+ (VFNMADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
+ VR128:$src3),
+ (VFNMADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFNMADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFNMADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFNMADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFNMADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1, VR256:$src2,
+ (alignedloadv8f32 addr:$src3)),
+ (VFNMADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1,
+ (alignedloadv8f32 addr:$src2),
+ VR256:$src3),
+ (VFNMADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFNMADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1, VR256:$src2,
+ (alignedloadv4f64 addr:$src3)),
+ (VFNMADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1,
+ (alignedloadv4f64 addr:$src2),
+ VR256:$src3),
+ (VFNMADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+// VFNMSUB
+def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFNMSUBSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, VR128:$src2,
+ (alignedloadv4f32 addr:$src3)),
+ (VFNMSUBSS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, (alignedloadv4f32 addr:$src2),
+ VR128:$src3),
+ (VFNMSUBSS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFNMSUBSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFNMSUBSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFNMSUBSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFNMSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, VR128:$src2,
+ (alignedloadv4f32 addr:$src3)),
+ (VFNMSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
+ VR128:$src3),
+ (VFNMSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFNMSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFNMSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFNMSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFNMSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1, VR256:$src2,
+ (alignedloadv8f32 addr:$src3)),
+ (VFNMSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1,
+ (alignedloadv8f32 addr:$src2),
+ VR256:$src3),
+ (VFNMSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFNMSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1, VR256:$src2,
+ (alignedloadv4f64 addr:$src3)),
+ (VFNMSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1,
+ (alignedloadv4f64 addr:$src2),
+ VR256:$src3),
+ (VFNMSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+// VFMADDSUB
+def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMADDSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, VR128:$src2,
+ (alignedloadv4f32 addr:$src3)),
+ (VFMADDSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
+ VR128:$src3),
+ (VFMADDSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMADDSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFMADDSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFMADDSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFMADDSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1, VR256:$src2,
+ (alignedloadv8f32 addr:$src3)),
+ (VFMADDSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1,
+ (alignedloadv8f32 addr:$src2),
+ VR256:$src3),
+ (VFMADDSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFMADDSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1, VR256:$src2,
+ (alignedloadv4f64 addr:$src3)),
+ (VFMADDSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1,
+ (alignedloadv4f64 addr:$src2),
+ VR256:$src3),
+ (VFMADDSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+// VFMSUBADD
+def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMSUBADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, VR128:$src2,
+ (alignedloadv4f32 addr:$src3)),
+ (VFMSUBADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
+ VR128:$src3),
+ (VFMSUBADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMSUBADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFMSUBADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFMSUBADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFMSUBADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1, VR256:$src2,
+ (alignedloadv8f32 addr:$src3)),
+ (VFMSUBADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1,
+ (alignedloadv8f32 addr:$src2),
+ VR256:$src3),
+ (VFMSUBADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFMSUBADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1, VR256:$src2,
+ (alignedloadv4f64 addr:$src3)),
+ (VFMSUBADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1,
+ (alignedloadv4f64 addr:$src2),
+ VR256:$src3),
+ (VFMSUBADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index ecd6a93..7ba3639 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -110,6 +110,8 @@ class A7 { bits<5> Prefix = 16; }
class T8XD { bits<5> Prefix = 17; }
class T8XS { bits<5> Prefix = 18; }
class TAXD { bits<5> Prefix = 19; }
+class XOP8 { bits<5> Prefix = 20; }
+class XOP9 { bits<5> Prefix = 21; }
class VEX { bit hasVEXPrefix = 1; }
class VEX_W { bit hasVEX_WPrefix = 1; }
class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
@@ -118,7 +120,8 @@ class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
class VEX_L { bit hasVEX_L = 1; }
class VEX_LIG { bit ignoresVEX_L = 1; }
class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; }
-
+class XOP_W { bit hasXOP_WPrefix = 1; }
+class XOP { bit hasXOP_Prefix = 1; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
string AsmStr, Domain d = GenericDomain>
: Instruction {
@@ -158,6 +161,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasVEX_L = 0; // Does this inst use large (256-bit) registers?
bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit
bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
+ bit hasXOP_WPrefix = 0; // Same bit as VEX_W, but used for swapping operands
+ bit hasXOP_Prefix = 0; // Does this inst require an XOP prefix?
// TSFlags layout should be kept in sync with X86InstrInfo.h.
let TSFlags{5-0} = FormBits;
@@ -179,6 +184,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{38} = hasVEX_L;
let TSFlags{39} = ignoresVEX_L;
let TSFlags{40} = has3DNow0F0FOpcode;
+ let TSFlags{41} = hasXOP_WPrefix;
+ let TSFlags{42} = hasXOP_Prefix;
}
class PseudoI<dag oops, dag iops, list<dag> pattern>
@@ -332,6 +339,10 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, TB,
Requires<[HasAVX]>;
+class VoPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+ Requires<[HasXMM]>;
// SSE2 Instruction Templates:
//
@@ -496,6 +507,30 @@ class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
OpSize, VEX_4V, Requires<[HasFMA3]>;
+// FMA4 Instruction Templates
+class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>;
+
+// XOP 2, 3 and 4 Operand Instruction Template
+class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
+ XOP, XOP9, Requires<[HasXOP]>;
+
+// XOP 2, 3 and 4 Operand Instruction Templates with imm byte
+class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
+ XOP, XOP8, Requires<[HasXOP]>;
+
+// XOP 5 operand instruction (VEX encoding!)
+class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ OpSize, VEX_4V, VEX_I8IMM, Requires<[HasXOP]>;
+
// X86-64 Instruction templates...
//
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 791bbe6..cd13bc4 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -130,28 +130,12 @@ def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
-def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
-def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
+def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
+def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
-def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
-def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
+def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>;
-def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
-def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
-def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>;
-def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>;
-
-def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>;
-def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>;
-def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>;
-def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
-
-def X86VPermilps : SDNode<"X86ISD::VPERMILPS", SDTShuff2OpI>;
-def X86VPermilpsy : SDNode<"X86ISD::VPERMILPSY", SDTShuff2OpI>;
-def X86VPermilpd : SDNode<"X86ISD::VPERMILPD", SDTShuff2OpI>;
-def X86VPermilpdy : SDNode<"X86ISD::VPERMILPDY", SDTShuff2OpI>;
-
-def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>;
+def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
@@ -363,12 +347,6 @@ def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
}]>;
-// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to
-// a PALIGNR imm.
-def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{
- return getI8Imm(X86::getShufflePALIGNRImmediate(N));
-}]>;
-
// EXTRACT_get_vextractf128_imm xform function: convert extract_subvector index
// to VEXTRACTF128 imm.
def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 24c4a53..7d1b9a1 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1528,9 +1528,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
// Build and insert into an implicit UNDEF value. This is OK because
// well be shifting and then extracting the lower 16-bits.
- BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
+ BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF),leaInReg2);
InsMI2 =
- BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
+ BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
.addReg(leaInReg2, RegState::Define, X86::sub_16bit)
.addReg(Src2, getKillRegState(isKill2));
addRegReg(MIB, leaInReg, true, leaInReg2, true);
@@ -2040,13 +2040,12 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
}
bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isTerminator()) return false;
+ if (!MI->isTerminator()) return false;
// Conditional branch is a special case.
- if (MCID.isBranch() && !MCID.isBarrier())
+ if (MI->isBranch() && !MI->isBarrier())
return true;
- if (!MCID.isPredicable())
+ if (!MI->isPredicable())
return true;
return !isPredicated(MI);
}
@@ -2072,7 +2071,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// A terminator that isn't a branch can't easily be handled by this
// analysis.
- if (!I->getDesc().isBranch())
+ if (!I->isBranch())
return true;
// Handle unconditional branches.
@@ -2556,6 +2555,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
switch (MI->getOpcode()) {
case X86::V_SET0:
+ case X86::FsFLD0SS:
+ case X86::FsFLD0SD:
return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
case X86::TEST8ri_NOREX:
MI->setDesc(get(X86::TEST8ri));
@@ -2771,7 +2772,9 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
case X86::RCPSSr:
case X86::RCPSSr_Int:
case X86::ROUNDSDr:
+ case X86::ROUNDSDr_Int:
case X86::ROUNDSSr:
+ case X86::ROUNDSSr_Int:
case X86::RSQRTSSr:
case X86::RSQRTSSr_Int:
case X86::SQRTSSr:
@@ -2783,7 +2786,9 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
case X86::Int_VCVTSS2SDrr:
case X86::VRCPSSr:
case X86::VROUNDSDr:
+ case X86::VROUNDSDr_Int:
case X86::VROUNDSSr:
+ case X86::VROUNDSSr_Int:
case X86::VRSQRTSSr:
case X86::VSQRTSSr:
return true;
@@ -2911,11 +2916,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Alignment = 16;
break;
case X86::FsFLD0SD:
- case X86::VFsFLD0SD:
Alignment = 8;
break;
case X86::FsFLD0SS:
- case X86::VFsFLD0SS:
Alignment = 4;
break;
default:
@@ -2950,9 +2953,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
case X86::AVX_SETALLONES:
case X86::AVX2_SETALLONES:
case X86::FsFLD0SD:
- case X86::FsFLD0SS:
- case X86::VFsFLD0SD:
- case X86::VFsFLD0SS: {
+ case X86::FsFLD0SS: {
// Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
// Create a constant-pool entry and operands to load from it.
@@ -2978,9 +2979,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineConstantPool &MCP = *MF.getConstantPool();
Type *Ty;
unsigned Opc = LoadMI->getOpcode();
- if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS)
+ if (Opc == X86::FsFLD0SS)
Ty = Type::getFloatTy(MF.getFunction()->getContext());
- else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD)
+ else if (Opc == X86::FsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
@@ -3569,7 +3570,13 @@ static const unsigned ReplaceableInstrsAVX2[][3] = {
{ X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm },
{ X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr },
{ X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm },
- { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr }
+ { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr },
+ { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
+ { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
+ { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm },
+ { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr },
+ { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm },
+ { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr }
};
// FIXME: Some shuffle and unpack instructions have equivalents in different
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 35631d5..0bc3afa 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -473,6 +473,7 @@ def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
def HasAVX : Predicate<"Subtarget->hasAVX()">;
def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
+def HasXMM : Predicate<"Subtarget->hasXMM()">;
def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
@@ -480,6 +481,7 @@ def HasAES : Predicate<"Subtarget->hasAES()">;
def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">;
def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
+def HasXOP : Predicate<"Subtarget->hasXOP()">;
def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">;
def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">;
def HasF16C : Predicate<"Subtarget->hasF16C()">;
@@ -1502,6 +1504,9 @@ include "X86InstrFragmentsSIMD.td"
// FMA - Fused Multiply-Add support (requires FMA)
include "X86InstrFMA.td"
+// XOP
+include "X86InstrXOP.td"
+
// SSE, MMX and 3DNow! vector support.
include "X86InstrSSE.td"
include "X86InstrMMX.td"
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 7cadac1..345f606 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -240,21 +240,13 @@ let Predicates = [HasAVX] in {
}
// Alias instructions that map fld0 to pxor for sse.
-// FIXME: Set encoding to pseudo!
-let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
- canFoldAsLoad = 1 in {
- def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
- [(set FR32:$dst, fp32imm0)]>,
- Requires<[HasSSE1]>, TB, OpSize;
- def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
- [(set FR64:$dst, fpimm0)]>,
- Requires<[HasSSE2]>, TB, OpSize;
- def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
- [(set FR32:$dst, fp32imm0)]>,
- Requires<[HasAVX]>, TB, OpSize, VEX_4V;
- def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
- [(set FR64:$dst, fpimm0)]>,
- Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+// This is expanded by ExpandPostRAPseudos.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isPseudo = 1 in {
+ def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
+ [(set FR32:$dst, fp32imm0)]>, Requires<[HasXMM]>;
+ def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
+ [(set FR64:$dst, fpimm0)]>, Requires<[HasXMMInt]>;
}
//===----------------------------------------------------------------------===//
@@ -569,6 +561,16 @@ let Predicates = [HasAVX] in {
(EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
(VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+
+ // Move low f32 and clear high bits.
+ def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSrr (v4f32 (V_SET0)),
+ (EXTRACT_SUBREG (v8f32 VR256:$src), sub_ss)), sub_xmm)>;
+ def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSrr (v4i32 (V_SET0)),
+ (EXTRACT_SUBREG (v8i32 VR256:$src), sub_ss)), sub_xmm)>;
}
let AddedComplexity = 20 in {
@@ -596,6 +598,9 @@ let Predicates = [HasAVX] in {
// Represent the same patterns above but in the form they appear for
// 256-bit types
+ def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
(v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
@@ -613,6 +618,15 @@ let Predicates = [HasAVX] in {
(SUBREG_TO_REG (i64 0),
(v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
sub_xmm)>;
+ def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))),
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+
+ // Move low f64 and clear high bits.
+ def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDrr (v2f64 (V_SET0)),
+ (EXTRACT_SUBREG (v4f64 VR256:$src), sub_sd)), sub_xmm)>;
// Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
@@ -634,6 +648,16 @@ let Predicates = [HasAVX] in {
(VMOVSSrr (v4f32 VR128:$src1),
(EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+ // 256-bit variants
+ def : Pat<(v8i32 (X86Movsd VR256:$src1, VR256:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_ss),
+ (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_ss)), sub_xmm)>;
+ def : Pat<(v8f32 (X86Movsd VR256:$src1, VR256:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_ss),
+ (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_ss)), sub_xmm)>;
+
// Shuffle with VMOVSD
def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
(VMOVSDrr VR128:$src1, FR64:$src2)>;
@@ -650,6 +674,17 @@ let Predicates = [HasAVX] in {
(VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
sub_sd))>;
+ // 256-bit variants
+ def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_sd),
+ (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_sd)), sub_xmm)>;
+ def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_sd),
+ (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_sd)), sub_xmm)>;
+
+
// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
// is during lowering, where it's not possible to recognize the fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
@@ -657,6 +692,9 @@ let Predicates = [HasAVX] in {
def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
(VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),
sub_sd))>;
+ def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),
+ sub_sd))>;
def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
(VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
sub_sd))>;
@@ -761,6 +799,22 @@ let isCodeGenOnly = 1 in {
"movupd\t{$src, $dst|$dst, $src}", []>, VEX;
}
+let Predicates = [HasAVX] in {
+def : Pat<(v8i32 (X86vzmovl
+ (insert_subvector undef, (v4i32 VR128:$src), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+def : Pat<(v4i64 (X86vzmovl
+ (insert_subvector undef, (v2i64 VR128:$src), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+def : Pat<(v8f32 (X86vzmovl
+ (insert_subvector undef, (v4f32 VR128:$src), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+def : Pat<(v4f64 (X86vzmovl
+ (insert_subvector undef, (v2f64 VR128:$src), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+}
+
+
def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
(VMOVUPSYmr addr:$dst, VR256:$src)>;
@@ -1156,14 +1210,17 @@ let Predicates = [HasAVX] in {
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
(VMOVHPSrm VR128:$src1, addr:$src2)>;
def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
+ (VMOVHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1,
(bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
(VMOVHPSrm VR128:$src1, addr:$src2)>;
- // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+ // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
// is during lowering, where it's not possible to recognize the load fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
// fold opportunity reappears.
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))),
(VMOVHPDrm VR128:$src1, addr:$src2)>;
@@ -1174,10 +1231,10 @@ let Predicates = [HasAVX] in {
// Store patterns
def : Pat<(store (f64 (vector_extract
- (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+ (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
(VMOVHPSmr addr:$dst, VR128:$src)>;
def : Pat<(store (f64 (vector_extract
- (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+ (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
(VMOVHPDmr addr:$dst, VR128:$src)>;
}
@@ -1189,21 +1246,24 @@ let Predicates = [HasSSE1] in {
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
(MOVHPSrm VR128:$src1, addr:$src2)>;
def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1,
(bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
(MOVHPSrm VR128:$src1, addr:$src2)>;
// Store patterns
def : Pat<(store (f64 (vector_extract
- (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+ (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
(MOVHPSmr addr:$dst, VR128:$src)>;
}
let Predicates = [HasSSE2] in {
- // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+ // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
// is during lowering, where it's not possible to recognize the load fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
// fold opportunity reappears.
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))),
(MOVHPDrm VR128:$src1, addr:$src2)>;
@@ -1214,7 +1274,7 @@ let Predicates = [HasSSE2] in {
// Store patterns
def : Pat<(store (f64 (vector_extract
- (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+ (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))),addr:$dst),
(MOVHPDmr addr:$dst, VR128:$src)>;
}
@@ -1943,7 +2003,7 @@ def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
// whenever possible to avoid declaring two versions of each one.
def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
(VCVTDQ2PSYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvtdq2_ps_256 (memopv8i32 addr:$src)),
+def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))),
(VCVTDQ2PSYrm addr:$src)>;
def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
@@ -2430,27 +2490,27 @@ let AddedComplexity = 10 in {
} // AddedComplexity
let Predicates = [HasSSE1] in {
- def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))),
(UNPCKLPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)),
(UNPCKLPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))),
(UNPCKHPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)),
(UNPCKHPSrr VR128:$src1, VR128:$src2)>;
}
let Predicates = [HasSSE2] in {
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))),
(UNPCKLPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)),
(UNPCKLPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))),
(UNPCKHPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)),
(UNPCKHPDrr VR128:$src1, VR128:$src2)>;
- // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the
+ // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
// problem is during lowering, where it's not possible to recognize the load
// fold cause it has two uses through a bitcast. One use disappears at isel
// time and the fold opportunity reappears.
@@ -2463,59 +2523,43 @@ let Predicates = [HasSSE2] in {
}
let Predicates = [HasAVX] in {
- def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))),
(VUNPCKLPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)),
(VUNPCKLPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))),
(VUNPCKHPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)),
(VUNPCKHPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8f32 (X86Unpcklps VR256:$src1, (memopv8f32 addr:$src2))),
+ def : Pat<(v8f32 (X86Unpckl VR256:$src1, (memopv8f32 addr:$src2))),
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8f32 (X86Unpcklps VR256:$src1, VR256:$src2)),
- (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpcklps VR256:$src1, VR256:$src2)),
+ def : Pat<(v8f32 (X86Unpckl VR256:$src1, VR256:$src2)),
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpcklps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
- (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8f32 (X86Unpckhps VR256:$src1, (memopv8f32 addr:$src2))),
- (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8f32 (X86Unpckhps VR256:$src1, VR256:$src2)),
- (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpckhps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+ def : Pat<(v8f32 (X86Unpckh VR256:$src1, (memopv8f32 addr:$src2))),
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8i32 (X86Unpckhps VR256:$src1, VR256:$src2)),
+ def : Pat<(v8f32 (X86Unpckh VR256:$src1, VR256:$src2)),
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))),
(VUNPCKLPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)),
(VUNPCKLPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))),
(VUNPCKHPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)),
(VUNPCKHPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, (memopv4f64 addr:$src2))),
+ def : Pat<(v4f64 (X86Unpckl VR256:$src1, (memopv4f64 addr:$src2))),
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, VR256:$src2)),
+ def : Pat<(v4f64 (X86Unpckl VR256:$src1, VR256:$src2)),
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, (memopv4i64 addr:$src2))),
- (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, VR256:$src2)),
- (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, (memopv4f64 addr:$src2))),
- (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, VR256:$src2)),
- (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, (memopv4i64 addr:$src2))),
+ def : Pat<(v4f64 (X86Unpckh VR256:$src1, (memopv4f64 addr:$src2))),
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, VR256:$src2)),
+ def : Pat<(v4f64 (X86Unpckh VR256:$src1, VR256:$src2)),
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
- // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the
+ // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
// problem is during lowering, where it's not possible to recognize the load
// fold cause it has two uses through a bitcast. One use disappears at isel
// time and the fold opportunity reappears.
@@ -2869,7 +2913,7 @@ multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins ssmem:$src1, VR128:$src2),
+ (ins VR128:$src1, ssmem:$src2),
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
}
@@ -3198,13 +3242,13 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
//===----------------------------------------------------------------------===//
// Prefetch intrinsic.
-def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
+def PREFETCHT0 : VoPSI<0x18, MRM1m, (outs), (ins i8mem:$src),
"prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>;
-def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
+def PREFETCHT1 : VoPSI<0x18, MRM2m, (outs), (ins i8mem:$src),
"prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>;
-def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
+def PREFETCHT2 : VoPSI<0x18, MRM3m, (outs), (ins i8mem:$src),
"prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>;
-def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
+def PREFETCHNTA : VoPSI<0x18, MRM0m, (outs), (ins i8mem:$src),
"prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>;
// Flush cache
@@ -3652,6 +3696,8 @@ defm VPOR : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64,
i128mem, 1, 0>, VEX_4V;
defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64,
i128mem, 1, 0>, VEX_4V;
+defm VPANDN : PDI_binop_rm<0xDF, "vpandn", X86andnp, v2i64, VR128, memopv2i64,
+ i128mem, 0, 0>, VEX_4V;
let ExeDomain = SSEPackedInt in {
let neverHasSideEffects = 1 in {
@@ -3666,17 +3712,6 @@ let ExeDomain = SSEPackedInt in {
VEX_4V;
// PSRADQri doesn't exist in SSE[1-3].
}
- def VPANDNrr : PDI<0xDF, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (v2i64 (X86andnp VR128:$src1, VR128:$src2)))]>,VEX_4V;
-
- def VPANDNrm : PDI<0xDF, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (X86andnp VR128:$src1,
- (memopv2i64 addr:$src2)))]>, VEX_4V;
}
}
@@ -3714,6 +3749,8 @@ defm VPORY : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64,
i256mem, 1, 0>, VEX_4V;
defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64,
i256mem, 1, 0>, VEX_4V;
+defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64,
+ i256mem, 0, 0>, VEX_4V;
let ExeDomain = SSEPackedInt in {
let neverHasSideEffects = 1 in {
@@ -3728,17 +3765,6 @@ let ExeDomain = SSEPackedInt in {
VEX_4V;
// PSRADQYri doesn't exist in SSE[1-3].
}
- def VPANDNYrr : PDI<0xDF, MRMSrcReg,
- (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
- "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR256:$dst,
- (v4i64 (X86andnp VR256:$src1, VR256:$src2)))]>,VEX_4V;
-
- def VPANDNYrm : PDI<0xDF, MRMSrcMem,
- (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
- "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR256:$dst, (X86andnp VR256:$src1,
- (memopv4i64 addr:$src2)))]>, VEX_4V;
}
}
@@ -3776,6 +3802,8 @@ defm POR : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64,
i128mem, 1>;
defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64,
i128mem, 1>;
+defm PANDN : PDI_binop_rm<0xDF, "pandn", X86andnp, v2i64, VR128, memopv2i64,
+ i128mem, 0>;
let ExeDomain = SSEPackedInt in {
let neverHasSideEffects = 1 in {
@@ -3787,14 +3815,6 @@ let ExeDomain = SSEPackedInt in {
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
"psrldq\t{$src2, $dst|$dst, $src2}", []>;
// PSRADQri doesn't exist in SSE[1-3].
- def PANDNrr : PDI<0xDF, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}", []>;
-
- let mayLoad = 1 in
- def PANDNrm : PDI<0xDF, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}", []>;
}
}
} // Constraints = "$src1 = $dst"
@@ -4198,66 +4218,88 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
}
let Predicates = [HasAVX] in {
- defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpcklbw,
+ defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl,
bc_v16i8, 0>, VEX_4V;
- defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpcklwd,
+ defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl,
bc_v8i16, 0>, VEX_4V;
- defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq,
+ defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
bc_v4i32, 0>, VEX_4V;
- defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpcklqdq,
+ defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
bc_v2i64, 0>, VEX_4V;
- defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw,
+ defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh,
bc_v16i8, 0>, VEX_4V;
- defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckhwd,
+ defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh,
bc_v8i16, 0>, VEX_4V;
- defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq,
+ defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh,
bc_v4i32, 0>, VEX_4V;
- defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckhqdq,
+ defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh,
bc_v2i64, 0>, VEX_4V;
}
let Predicates = [HasAVX2] in {
- defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbw,
+ defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl,
bc_v32i8>, VEX_4V;
- defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwd,
+ defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl,
bc_v16i16>, VEX_4V;
- defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldq,
+ defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
bc_v8i32>, VEX_4V;
- defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpcklqdq,
+ defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
bc_v4i64>, VEX_4V;
- defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw,
+ defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh,
bc_v32i8>, VEX_4V;
- defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwd,
+ defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh,
bc_v16i16>, VEX_4V;
- defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdq,
+ defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh,
bc_v8i32>, VEX_4V;
- defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckhqdq,
+ defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh,
bc_v4i64>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
- defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw,
+ defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl,
bc_v16i8>;
- defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd,
+ defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl,
bc_v8i16>;
- defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq,
+ defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl,
bc_v4i32>;
- defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpcklqdq,
+ defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl,
bc_v2i64>;
- defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw,
+ defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh,
bc_v16i8>;
- defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd,
+ defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh,
bc_v8i16>;
- defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq,
+ defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh,
bc_v4i32>;
- defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckhqdq,
+ defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh,
bc_v2i64>;
}
} // ExeDomain = SSEPackedInt
+// Patterns for using AVX1 instructions with integer vectors
+// Here to give AVX2 priority
+let Predicates = [HasAVX] in {
+ def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+ (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
+ (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+ (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
+ (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
+ (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
+ (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
+ (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
+ (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
+}
+
// Splat v2f64 / v2i64
let AddedComplexity = 10 in {
def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
@@ -4784,7 +4826,7 @@ def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
// AVX 256-bit register conversion intrinsics
def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
(VCVTDQ2PDYrr VR128:$src)>;
-def : Pat<(int_x86_avx_cvtdq2_pd_256 (memopv4i32 addr:$src)),
+def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))),
(VCVTDQ2PDYrm addr:$src)>;
def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
@@ -4794,7 +4836,7 @@ def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
(VCVTDQ2PDYrr VR128:$src)>;
-def : Pat<(v4f64 (sint_to_fp (memopv4i32 addr:$src))),
+def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
(VCVTDQ2PDYrm addr:$src)>;
//===---------------------------------------------------------------------===//
@@ -5085,7 +5127,7 @@ let Constraints = "$src1 = $dst" in {
/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag128, Intrinsic IntId128> {
+ Intrinsic IntId128> {
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -5097,12 +5139,12 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(IntId128
- (bitconvert (mem_frag128 addr:$src))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src))))]>, OpSize;
}
/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag256, Intrinsic IntId256> {
+ Intrinsic IntId256> {
def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -5114,32 +5156,32 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
(IntId256
- (bitconvert (mem_frag256 addr:$src))))]>, OpSize;
+ (bitconvert (memopv4i64 addr:$src))))]>, OpSize;
}
let Predicates = [HasAVX] in {
- defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
+ defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb",
int_x86_ssse3_pabs_b_128>, VEX;
- defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16,
+ defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw",
int_x86_ssse3_pabs_w_128>, VEX;
- defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32,
+ defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd",
int_x86_ssse3_pabs_d_128>, VEX;
}
let Predicates = [HasAVX2] in {
- defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb", memopv32i8,
+ defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb",
int_x86_avx2_pabs_b>, VEX;
- defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw", memopv16i16,
+ defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw",
int_x86_avx2_pabs_w>, VEX;
- defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", memopv8i32,
+ defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd",
int_x86_avx2_pabs_d>, VEX;
}
-defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8,
+defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb",
int_x86_ssse3_pabs_b_128>;
-defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16,
+defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw",
int_x86_ssse3_pabs_w_128>;
-defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
+defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
int_x86_ssse3_pabs_d_128>;
//===---------------------------------------------------------------------===//
@@ -5148,8 +5190,7 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag128, Intrinsic IntId128,
- bit Is2Addr = 1> {
+ Intrinsic IntId128, bit Is2Addr = 1> {
let isCommutable = 1 in
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
@@ -5165,11 +5206,11 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(IntId128 VR128:$src1,
- (bitconvert (mem_frag128 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
}
multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag256, Intrinsic IntId256> {
+ Intrinsic IntId256> {
let isCommutable = 1 in
def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2),
@@ -5181,94 +5222,94 @@ multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(IntId256 VR256:$src1,
- (bitconvert (mem_frag256 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
}
let ImmT = NoImm, Predicates = [HasAVX] in {
let isCommutable = 0 in {
- defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
+ defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw",
int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
- defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv4i32,
+ defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd",
int_x86_ssse3_phadd_d_128, 0>, VEX_4V;
- defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv8i16,
+ defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
- defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv8i16,
+ defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw",
int_x86_ssse3_phsub_w_128, 0>, VEX_4V;
- defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv4i32,
+ defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd",
int_x86_ssse3_phsub_d_128, 0>, VEX_4V;
- defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv8i16,
+ defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw",
int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
- defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv16i8,
+ defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw",
int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V;
- defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv16i8,
+ defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb",
int_x86_ssse3_pshuf_b_128, 0>, VEX_4V;
- defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv16i8,
+ defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb",
int_x86_ssse3_psign_b_128, 0>, VEX_4V;
- defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv8i16,
+ defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw",
int_x86_ssse3_psign_w_128, 0>, VEX_4V;
- defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv4i32,
+ defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd",
int_x86_ssse3_psign_d_128, 0>, VEX_4V;
}
-defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16,
+defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw",
int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
}
let ImmT = NoImm, Predicates = [HasAVX2] in {
let isCommutable = 0 in {
- defm VPHADDW : SS3I_binop_rm_int_y<0x01, "vphaddw", memopv16i16,
+ defm VPHADDW : SS3I_binop_rm_int_y<0x01, "vphaddw",
int_x86_avx2_phadd_w>, VEX_4V;
- defm VPHADDD : SS3I_binop_rm_int_y<0x02, "vphaddd", memopv8i32,
+ defm VPHADDD : SS3I_binop_rm_int_y<0x02, "vphaddd",
int_x86_avx2_phadd_d>, VEX_4V;
- defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", memopv16i16,
+ defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
int_x86_avx2_phadd_sw>, VEX_4V;
- defm VPHSUBW : SS3I_binop_rm_int_y<0x05, "vphsubw", memopv16i16,
+ defm VPHSUBW : SS3I_binop_rm_int_y<0x05, "vphsubw",
int_x86_avx2_phsub_w>, VEX_4V;
- defm VPHSUBD : SS3I_binop_rm_int_y<0x06, "vphsubd", memopv8i32,
+ defm VPHSUBD : SS3I_binop_rm_int_y<0x06, "vphsubd",
int_x86_avx2_phsub_d>, VEX_4V;
- defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", memopv16i16,
+ defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw",
int_x86_avx2_phsub_sw>, VEX_4V;
- defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", memopv32i8,
+ defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw",
int_x86_avx2_pmadd_ub_sw>, VEX_4V;
- defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", memopv32i8,
+ defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb",
int_x86_avx2_pshuf_b>, VEX_4V;
- defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv32i8,
+ defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb",
int_x86_avx2_psign_b>, VEX_4V;
- defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv16i16,
+ defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw",
int_x86_avx2_psign_w>, VEX_4V;
- defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv8i32,
+ defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd",
int_x86_avx2_psign_d>, VEX_4V;
}
-defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", memopv16i16,
+defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw",
int_x86_avx2_pmul_hr_sw>, VEX_4V;
}
// None of these have i8 immediate fields.
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
- defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16,
+ defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw",
int_x86_ssse3_phadd_w_128>;
- defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32,
+ defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd",
int_x86_ssse3_phadd_d_128>;
- defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16,
+ defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw",
int_x86_ssse3_phadd_sw_128>;
- defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16,
+ defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw",
int_x86_ssse3_phsub_w_128>;
- defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32,
+ defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd",
int_x86_ssse3_phsub_d_128>;
- defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16,
+ defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw",
int_x86_ssse3_phsub_sw_128>;
- defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8,
+ defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw",
int_x86_ssse3_pmadd_ub_sw_128>;
- defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv16i8,
+ defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb",
int_x86_ssse3_pshuf_b_128>;
- defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv16i8,
+ defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb",
int_x86_ssse3_psign_b_128>;
- defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv8i16,
+ defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw",
int_x86_ssse3_psign_w_128>;
- defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32,
+ defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd",
int_x86_ssse3_psign_d_128>;
}
-defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16,
+defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw",
int_x86_ssse3_pmul_hr_sw_128>;
}
@@ -6017,8 +6058,18 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
Intrinsic F32Int,
Intrinsic F64Int, bit Is2Addr = 1> {
let ExeDomain = GenericDomain in {
- // Intrinsic operation, reg.
+ // Operation, reg.
def SSr : SS4AIi8<opcss, MRMSrcReg,
+ (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ []>, OpSize;
+
+ // Intrinsic operation, reg.
+ def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
@@ -6040,8 +6091,18 @@ let ExeDomain = GenericDomain in {
(F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
OpSize;
- // Intrinsic operation, reg.
+ // Operation, reg.
def SDr : SS4AIi8<opcsd, MRMSrcReg,
+ (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ []>, OpSize;
+
+ // Intrinsic operation, reg.
+ def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
@@ -6079,6 +6140,27 @@ let Predicates = [HasAVX] in {
defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
int_x86_sse41_round_ss,
int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
+
+ def : Pat<(ffloor FR32:$src),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+ def : Pat<(f64 (ffloor FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+ def : Pat<(f32 (fnearbyint FR32:$src)),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
+ def : Pat<(f64 (fnearbyint FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
+ def : Pat<(f32 (fceil FR32:$src)),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+ def : Pat<(f64 (fceil FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+ def : Pat<(f32 (frint FR32:$src)),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
+ def : Pat<(f64 (frint FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
+ def : Pat<(f32 (ftrunc FR32:$src)),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+ def : Pat<(f64 (ftrunc FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
}
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
@@ -6088,6 +6170,27 @@ let Constraints = "$src1 = $dst" in
defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
+def : Pat<(ffloor FR32:$src),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+def : Pat<(f64 (ffloor FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+def : Pat<(f32 (fnearbyint FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
+def : Pat<(f64 (fnearbyint FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
+def : Pat<(f32 (fceil FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+def : Pat<(f64 (fceil FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+def : Pat<(f32 (frint FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
+def : Pat<(f64 (frint FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
+def : Pat<(f32 (ftrunc FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+def : Pat<(f64 (ftrunc FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+
//===----------------------------------------------------------------------===//
// SSE4.1 - Packed Bit Test
//===----------------------------------------------------------------------===//
@@ -6195,7 +6298,7 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(IntId128
- (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src))))]>, OpSize;
}
let Predicates = [HasAVX] in
@@ -6221,7 +6324,7 @@ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
}
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
@@ -6237,7 +6340,7 @@ multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(IntId256 VR256:$src1,
- (bitconvert (memopv32i8 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
}
let Predicates = [HasAVX] in {
@@ -6400,38 +6503,38 @@ let Predicates = [HasAVX] in {
let isCommutable = 0 in {
let ExeDomain = SSEPackedSingle in {
defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ VR128, memopv4f32, i128mem, 0>, VEX_4V;
defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
- int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ int_x86_avx_blend_ps_256, VR256, memopv8f32, i256mem, 0>, VEX_4V;
}
let ExeDomain = SSEPackedDouble in {
defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ VR128, memopv2f64, i128mem, 0>, VEX_4V;
defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
- int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ int_x86_avx_blend_pd_256, VR256, memopv4f64, i256mem, 0>, VEX_4V;
}
defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ VR128, memopv2i64, i128mem, 0>, VEX_4V;
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ VR128, memopv2i64, i128mem, 0>, VEX_4V;
}
let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ VR128, memopv4f32, i128mem, 0>, VEX_4V;
let ExeDomain = SSEPackedDouble in
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ VR128, memopv2f64, i128mem, 0>, VEX_4V;
let ExeDomain = SSEPackedSingle in
defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
- VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ VR256, memopv8f32, i256mem, 0>, VEX_4V;
}
let Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
- VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ VR256, memopv4i64, i256mem, 0>, VEX_4V;
defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
- VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ VR256, memopv4i64, i256mem, 0>, VEX_4V;
}
}
@@ -6439,35 +6542,35 @@ let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
let ExeDomain = SSEPackedSingle in
defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
- VR128, memopv16i8, i128mem>;
+ VR128, memopv4f32, i128mem>;
let ExeDomain = SSEPackedDouble in
defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
- VR128, memopv16i8, i128mem>;
+ VR128, memopv2f64, i128mem>;
defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
- VR128, memopv16i8, i128mem>;
+ VR128, memopv2i64, i128mem>;
defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
- VR128, memopv16i8, i128mem>;
+ VR128, memopv2i64, i128mem>;
}
let ExeDomain = SSEPackedSingle in
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
- VR128, memopv16i8, i128mem>;
+ VR128, memopv4f32, i128mem>;
let ExeDomain = SSEPackedDouble in
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
- VR128, memopv16i8, i128mem>;
+ VR128, memopv2f64, i128mem>;
}
/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
RegisterClass RC, X86MemOperand x86memop,
PatFrag mem_frag, Intrinsic IntId> {
- def rr : I<opc, MRMSrcReg, (outs RC:$dst),
+ def rr : Ii8<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
- def rm : I<opc, MRMSrcMem, (outs RC:$dst),
+ def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
@@ -6480,23 +6583,23 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedDouble in {
defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem,
- memopv16i8, int_x86_sse41_blendvpd>;
+ memopv2f64, int_x86_sse41_blendvpd>;
defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
- memopv32i8, int_x86_avx_blendv_pd_256>;
+ memopv4f64, int_x86_avx_blendv_pd_256>;
} // ExeDomain = SSEPackedDouble
let ExeDomain = SSEPackedSingle in {
defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
- memopv16i8, int_x86_sse41_blendvps>;
+ memopv4f32, int_x86_sse41_blendvps>;
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
- memopv32i8, int_x86_avx_blendv_ps_256>;
+ memopv8f32, int_x86_avx_blendv_ps_256>;
} // ExeDomain = SSEPackedSingle
defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
- memopv16i8, int_x86_sse41_pblendvb>;
+ memopv2i64, int_x86_sse41_pblendvb>;
}
let Predicates = [HasAVX2] in {
defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem,
- memopv32i8, int_x86_avx2_pblendvb>;
+ memopv4i64, int_x86_avx2_pblendvb>;
}
let Predicates = [HasAVX] in {
@@ -6537,7 +6640,8 @@ let Predicates = [HasAVX2] in {
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
- multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
+ Intrinsic IntId> {
def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr,
@@ -6551,15 +6655,18 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
"\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst,
(IntId VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
+ (bitconvert (mem_frag addr:$src2)), XMM0))]>, OpSize;
}
}
let ExeDomain = SSEPackedDouble in
-defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64,
+ int_x86_sse41_blendvpd>;
let ExeDomain = SSEPackedSingle in
-defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
-defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32,
+ int_x86_sse41_blendvps>;
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64,
+ int_x86_sse41_pblendvb>;
let Predicates = [HasSSE41] in {
def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1),
@@ -6614,8 +6721,7 @@ multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+ (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize;
}
/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
@@ -6630,8 +6736,7 @@ multiclass SS42I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
- (IntId256 VR256:$src1,
- (bitconvert (memopv32i8 addr:$src2))))]>, OpSize;
+ (IntId256 VR256:$src1, (memopv4i64 addr:$src2)))]>, OpSize;
}
let Predicates = [HasAVX] in {
@@ -6913,7 +7018,7 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
}
// Perform One Round of an AES Encryption/Decryption Flow
@@ -7144,7 +7249,7 @@ def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
//===----------------------------------------------------------------------===//
// VINSERTF128 - Insert packed floating-point values
//
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR128:$src2, i8imm:$src3),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -7163,35 +7268,10 @@ def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
(VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
-def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-
//===----------------------------------------------------------------------===//
// VEXTRACTF128 - Extract packed floating-point values
//
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
(ins VR256:$src1, i8imm:$src2),
"vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -7210,31 +7290,6 @@ def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
(VEXTRACTF128rr VR256:$src1, imm:$src2)>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v4f32 (VEXTRACTF128rr
- (v8f32 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v2f64 (VEXTRACTF128rr
- (v4f64 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v4i32 (VEXTRACTF128rr
- (v8i32 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v2i64 (VEXTRACTF128rr
- (v4i64 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v8i16 (VEXTRACTF128rr
- (v16i16 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v16i8 (VEXTRACTF128rr
- (v32i8 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-
//===----------------------------------------------------------------------===//
// VMASKMOV - Conditional SIMD Packed Loads and Stores
//
@@ -7288,7 +7343,8 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop_i:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V;
+ [(set RC:$dst, (IntVar RC:$src1,
+ (bitconvert (i_frag addr:$src2))))]>, VEX_4V;
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, i8imm:$src2),
@@ -7302,11 +7358,11 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
let ExeDomain = SSEPackedSingle in {
defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
- memopv4f32, memopv4i32,
+ memopv4f32, memopv2i64,
int_x86_avx_vpermilvar_ps,
int_x86_avx_vpermil_ps>;
defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
- memopv8f32, memopv8i32,
+ memopv8f32, memopv4i64,
int_x86_avx_vpermilvar_ps_256,
int_x86_avx_vpermil_ps_256>;
}
@@ -7321,19 +7377,28 @@ let ExeDomain = SSEPackedDouble in {
int_x86_avx_vpermil_pd_256>;
}
-def : Pat<(v8f32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))),
+def : Pat<(v8f32 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
(VPERMILPSYri VR256:$src1, imm:$imm)>;
-def : Pat<(v4f64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))),
+def : Pat<(v4f64 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
(VPERMILPDYri VR256:$src1, imm:$imm)>;
-def : Pat<(v8i32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))),
+def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
(VPERMILPSYri VR256:$src1, imm:$imm)>;
-def : Pat<(v4i64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))),
+def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
(VPERMILPDYri VR256:$src1, imm:$imm)>;
+def : Pat<(v8f32 (X86VPermilp (memopv8f32 addr:$src1), (i8 imm:$imm))),
+ (VPERMILPSYmi addr:$src1, imm:$imm)>;
+def : Pat<(v4f64 (X86VPermilp (memopv4f64 addr:$src1), (i8 imm:$imm))),
+ (VPERMILPDYmi addr:$src1, imm:$imm)>;
+def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (memopv4i64 addr:$src1)),
+ (i8 imm:$imm))),
+ (VPERMILPSYmi addr:$src1, imm:$imm)>;
+def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))),
+ (VPERMILPDYmi addr:$src1, imm:$imm)>;
//===----------------------------------------------------------------------===//
// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
//
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -7359,22 +7424,9 @@ def : Pat<(int_x86_avx_vperm2f128_pd_256
VR256:$src1, (memopv4f64 addr:$src2), imm:$src3),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
def : Pat<(int_x86_avx_vperm2f128_si_256
- VR256:$src1, (memopv8i32 addr:$src2), imm:$src3),
+ VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), imm:$src3),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
-def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v8i32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v4i64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v4f64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-
//===----------------------------------------------------------------------===//
// VZERO - Zero YMM registers
//
@@ -7451,9 +7503,9 @@ multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
let isCommutable = 0 in {
defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
- VR128, memopv16i8, i128mem>;
+ VR128, memopv2i64, i128mem>;
defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
- VR256, memopv32i8, i256mem>;
+ VR256, memopv4i64, i256mem>;
}
//===----------------------------------------------------------------------===//
@@ -7541,11 +7593,12 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (Int VR256:$src1, (mem_frag addr:$src2)))]>,
+ [(set VR256:$dst, (Int VR256:$src1,
+ (bitconvert (mem_frag addr:$src2))))]>,
VEX_4V;
}
-defm VPERMD : avx2_perm<0x36, "vpermd", memopv8i32, int_x86_avx2_permd>;
+defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, int_x86_avx2_permd>;
let ExeDomain = SSEPackedSingle in
defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>;
@@ -7571,7 +7624,7 @@ defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>,
VEX_W;
//===----------------------------------------------------------------------===//
-// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
+// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
//
def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
@@ -7587,6 +7640,64 @@ def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
imm:$src3))]>,
VEX_4V;
+let Predicates = [HasAVX2] in {
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (memopv4i64 addr:$src2)),
+ (i8 imm:$imm))),
+ (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
+ (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)),
+ (i8 imm:$imm))),
+ (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2),
+ (i8 imm:$imm))),
+ (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+}
+
+// AVX1 patterns
+def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+
+def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1,
+ (memopv8f32 addr:$src2), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1,
+ (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
+ (memopv4i64 addr:$src2), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1,
+ (memopv4f64 addr:$src2), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1,
+ (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
+ (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+
+
//===----------------------------------------------------------------------===//
// VINSERTI128 - Insert packed integer values
//
@@ -7603,6 +7714,51 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
(int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2),
imm:$src3))]>, VEX_4V;
+let Predicates = [HasAVX2] in {
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTI128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTI128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
+ (i32 imm)),
+ (VINSERTI128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
+ (i32 imm)),
+ (VINSERTI128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+}
+
+// AVX1 patterns
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+
//===----------------------------------------------------------------------===//
// VEXTRACTI128 - Extract packed integer values
//
@@ -7617,6 +7773,51 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
(ins i128mem:$dst, VR256:$src1, i8imm:$src2),
"vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX;
+let Predicates = [HasAVX2] in {
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2i64 (VEXTRACTI128rr
+ (v4i64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4i32 (VEXTRACTI128rr
+ (v8i32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v8i16 (VEXTRACTI128rr
+ (v16i16 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v16i8 (VEXTRACTI128rr
+ (v32i8 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+}
+
+// AVX1 patterns
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4f32 (VEXTRACTF128rr
+ (v8f32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2f64 (VEXTRACTF128rr
+ (v4f64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2i64 (VEXTRACTF128rr
+ (v4i64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4i32 (VEXTRACTF128rr
+ (v8i32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v8i16 (VEXTRACTF128rr
+ (v16i16 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v16i8 (VEXTRACTF128rr
+ (v32i8 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+
//===----------------------------------------------------------------------===//
// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores
//
diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td
new file mode 100644
index 0000000..64cc44d
--- /dev/null
+++ b/lib/Target/X86/X86InstrXOP.td
@@ -0,0 +1,243 @@
+//====- X86InstrXOP.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes XOP (eXtended OPerations)
+//
+//===----------------------------------------------------------------------===//
+
+multiclass xop2op<bits<8> opc, string OpcodeStr, X86MemOperand x86memop> {
+ def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>, VEX;
+ def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>, VEX;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPHSUBWD : xop2op<0xE2, "vphsubwd", f128mem>;
+ defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", f128mem>;
+ defm VPHSUBBW : xop2op<0xE1, "vphsubbw", f128mem>;
+ defm VPHADDWQ : xop2op<0xC7, "vphaddwq", f128mem>;
+ defm VPHADDWD : xop2op<0xC6, "vphaddwd", f128mem>;
+ defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", f128mem>;
+ defm VPHADDUWD : xop2op<0xD6, "vphadduwd", f128mem>;
+ defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", f128mem>;
+ defm VPHADDUBW : xop2op<0xD1, "vphaddubw", f128mem>;
+ defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", f128mem>;
+ defm VPHADDUBD : xop2op<0xD2, "vphaddubd", f128mem>;
+ defm VPHADDDQ : xop2op<0xCB, "vphadddq", f128mem>;
+ defm VPHADDBW : xop2op<0xC1, "vphaddbw", f128mem>;
+ defm VPHADDBQ : xop2op<0xC3, "vphaddbq", f128mem>;
+ defm VPHADDBD : xop2op<0xC2, "vphaddbd", f128mem>;
+ defm VFRCZSS : xop2op<0x82, "vfrczss", f32mem>;
+ defm VFRCZSD : xop2op<0x83, "vfrczsd", f64mem>;
+ defm VFRCZPS : xop2op<0x80, "vfrczps", f128mem>;
+ defm VFRCZPD : xop2op<0x81, "vfrczpd", f128mem>;
+}
+
+multiclass xop2op256<bits<8> opc, string OpcodeStr> {
+ def rrY : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>, VEX, VEX_L;
+ def rmY : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>, VEX;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VFRCZPS : xop2op256<0x80, "vfrczps">;
+ defm VFRCZPD : xop2op256<0x81, "vfrczpd">;
+}
+
+multiclass xop3op<bits<8> opc, string OpcodeStr> {
+ def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4VOp3;
+ def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_W;
+ def mr : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4VOp3;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPSHLW : xop3op<0x95, "vpshlw">;
+ defm VPSHLQ : xop3op<0x97, "vpshlq">;
+ defm VPSHLD : xop3op<0x96, "vpshld">;
+ defm VPSHLB : xop3op<0x94, "vpshlb">;
+ defm VPSHAW : xop3op<0x99, "vpshaw">;
+ defm VPSHAQ : xop3op<0x9B, "vpshaq">;
+ defm VPSHAD : xop3op<0x9A, "vpshad">;
+ defm VPSHAB : xop3op<0x98, "vpshab">;
+ defm VPROTW : xop3op<0x91, "vprotw">;
+ defm VPROTQ : xop3op<0x93, "vprotq">;
+ defm VPROTD : xop3op<0x92, "vprotd">;
+ defm VPROTB : xop3op<0x90, "vprotb">;
+}
+
+multiclass xop3opimm<bits<8> opc, string OpcodeStr> {
+ def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX;
+ def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPROTW : xop3opimm<0xC1, "vprotw">;
+ defm VPROTQ : xop3opimm<0xC3, "vprotq">;
+ defm VPROTD : xop3opimm<0xC2, "vprotd">;
+ defm VPROTB : xop3opimm<0xC0, "vprotb">;
+}
+
+// Instruction where second source can be memory, but third must be register
+multiclass xop4opm2<bits<8> opc, string OpcodeStr> {
+ def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_4V, VEX_I8IMM;
+ def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_4V, VEX_I8IMM;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd">;
+ defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd">;
+ defm VPMACSWW : xop4opm2<0x95, "vpmacsww">;
+ defm VPMACSWD : xop4opm2<0x96, "vpmacswd">;
+ defm VPMACSSWW : xop4opm2<0x85, "vpmacssww">;
+ defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd">;
+ defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql">;
+ defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh">;
+ defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd">;
+ defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql">;
+ defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh">;
+ defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd">;
+}
+
+// Instruction where second source can be memory, third must be imm8
+multiclass xop4opimm<bits<8> opc, string OpcodeStr> {
+ def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_4V;
+ def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_4V;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPCOMW : xop4opimm<0xCD, "vpcomw">;
+ defm VPCOMUW : xop4opimm<0xED, "vpcomuw">;
+ defm VPCOMUQ : xop4opimm<0xEF, "vpcomuq">;
+ defm VPCOMUD : xop4opimm<0xEE, "vpcomud">;
+ defm VPCOMUB : xop4opimm<0xEC, "vpcomub">;
+ defm VPCOMQ : xop4opimm<0xCF, "vpcomq">;
+ defm VPCOMD : xop4opimm<0xCE, "vpcomd">;
+ defm VPCOMB : xop4opimm<0xCC, "vpcomb">;
+}
+
+// Instruction where either second or third source can be memory
+multiclass xop4op<bits<8> opc, string OpcodeStr> {
+ def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_4V, VEX_I8IMM;
+ def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_4V, VEX_I8IMM, XOP_W;
+ def mr : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_4V, VEX_I8IMM;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPPERM : xop4op<0xA3, "vpperm">;
+ defm VPCMOV : xop4op<0xA2, "vpcmov">;
+}
+
+multiclass xop4op256<bits<8> opc, string OpcodeStr> {
+ def rrY : IXOPi8<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_4V, VEX_I8IMM;
+ def rmY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_4V, VEX_I8IMM, XOP_W;
+ def mrY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_4V, VEX_I8IMM;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPCMOV : xop4op256<0xA2, "vpcmov">;
+}
+
+multiclass xop5op<bits<8> opc, string OpcodeStr> {
+ def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ []>;
+ def rm : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ []>, XOP_W;
+ def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, VR128:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ []>;
+ def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ []>;
+ def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, f256mem:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ []>, XOP_W;
+ def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, VR256:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ []>;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPERMIL2PD : xop5op<0x49, "vpermil2pd">;
+ defm VPERMIL2PS : xop5op<0x48, "vpermil2ps">;
+}
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 3f88fa6..2145a33 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -424,7 +424,9 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
TargetJITInfo::LazyResolverFn
X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ TsanIgnoreWritesBegin();
JITCompilerFunction = F;
+ TsanIgnoreWritesEnd();
#if defined (X86_32_JIT) && !defined (_MSC_VER)
if (Subtarget->hasSSE1())
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 81ee665..9232196 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -368,10 +368,6 @@ ReSimplify:
case X86::SETB_C64r: LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break;
case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
- case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::VFsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
- case X86::VFsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index c1ac9f3..4e80432 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -452,7 +452,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- return (RealignStack &&
+ return (MF.getTarget().Options.RealignStack &&
!MFI->hasVarSizedObjects());
}
@@ -583,7 +583,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// sure we restore the stack pointer immediately after the call, there may
// be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
MachineBasicBlock::iterator B = MBB.begin();
- while (I != B && !llvm::prior(I)->getDesc().isCall())
+ while (I != B && !llvm::prior(I)->isCall())
--I;
MBB.insert(I, New);
}
@@ -665,7 +665,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
case MVT::i8:
if (High) {
switch (Reg) {
- default: return 0;
+ default: return getX86SubSuperRegister(Reg, MVT::i64, High);
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AH;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -785,6 +785,22 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
return X86::R15D;
}
case MVT::i64:
+ // For 64-bit mode if we've requested a "high" register and the
+ // Q or r constraints we want one of these high registers or
+ // just the register name otherwise.
+ if (High) {
+ switch (Reg) {
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SP;
+ // Fallthrough.
+ }
+ }
switch (Reg) {
default: return Reg;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index e7bcbf8..6e092c7 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -273,6 +273,8 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
if (IsAMD && ((ECX >> 16) & 0x1)) {
HasFMA4 = true;
ToggleFeature(X86::FeatureFMA4);
+ HasXOP = true;
+ ToggleFeature(X86::FeatureXOP);
}
}
}
@@ -317,6 +319,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
, HasCLMUL(false)
, HasFMA3(false)
, HasFMA4(false)
+ , HasXOP(false)
, HasMOVBE(false)
, HasRDRAND(false)
, HasF16C(false)
@@ -387,9 +390,6 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
assert((!In64BitMode || HasX86_64) &&
"64-bit code requested on a subtarget that doesn't support it!");
- if(EnableSegmentedStacks && !isTargetELF())
- report_fatal_error("Segmented stacks are only implemented on ELF.");
-
// Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both
// 32 and 64 bit) and for all 64-bit targets.
if (StackAlignOverride)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index e93f8e9..ccb9be0 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -93,6 +93,9 @@ protected:
/// HasFMA4 - Target has 4-operand fused multiply-add
bool HasFMA4;
+ /// HasXOP - Target has XOP instructions
+ bool HasXOP;
+
/// HasMOVBE - True if the processor has the MOVBE instruction.
bool HasMOVBE;
@@ -198,6 +201,7 @@ public:
bool hasCLMUL() const { return HasCLMUL; }
bool hasFMA3() const { return HasFMA3; }
bool hasFMA4() const { return HasFMA4; }
+ bool hasXOP() const { return HasXOP; }
bool hasMOVBE() const { return HasMOVBE; }
bool hasRDRAND() const { return HasRDRAND; }
bool hasF16C() const { return HasF16C; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 1c9f3bd..126042e 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -31,9 +31,10 @@ extern "C" void LLVMInitializeX86Target() {
X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : X86TargetMachine(T, TT, CPU, FS, RM, CM, OL, false),
+ : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false),
DataLayout(getSubtargetImpl()->isTargetDarwin() ?
"e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-"
"n8:16:32-S128" :
@@ -52,9 +53,10 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : X86TargetMachine(T, TT, CPU, FS, RM, CM, OL, true),
+ : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true),
DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
"n8:16:32:64-S128"),
InstrInfo(*this),
@@ -67,11 +69,12 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
///
X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
- Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, Options.StackAlignmentOverride, is64Bit),
FrameLowering(*this, Subtarget),
ELFWriterInfo(is64Bit, true) {
// Determine the PICStyle based on the target selected.
@@ -95,8 +98,11 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
}
// default to hard float ABI
- if (FloatABIType == FloatABI::Default)
- FloatABIType = FloatABI::Hard;
+ if (Options.FloatABIType == FloatABI::Default)
+ this->Options.FloatABIType = FloatABI::Hard;
+
+ if (Options.EnableSegmentedStacks && !Subtarget.isTargetELF())
+ report_fatal_error("Segmented stacks are only implemented on ELF.");
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 64be458..3ac1769 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -38,7 +38,7 @@ class X86TargetMachine : public LLVMTargetMachine {
public:
X86TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool is64Bit);
@@ -85,7 +85,7 @@ class X86_32TargetMachine : public X86TargetMachine {
X86JITInfo JITInfo;
public:
X86_32TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
virtual const TargetData *getTargetData() const { return &DataLayout; }
@@ -113,7 +113,7 @@ class X86_64TargetMachine : public X86TargetMachine {
X86JITInfo JITInfo;
public:
X86_64TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
virtual const TargetData *getTargetData() const { return &DataLayout; }
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 9bb54a8..f8c30eb 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -220,7 +220,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
MachineInstr *MI = I;
DebugLoc dl = I->getDebugLoc();
- bool isControlFlow = MI->getDesc().isCall() || MI->getDesc().isReturn();
+ bool isControlFlow = MI->isCall() || MI->isReturn();
// Shortcut: don't need to check regular instructions in dirty state.
if (!isControlFlow && CurState == ST_DIRTY)
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index d91da8c..de4abfc 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -21,17 +21,5 @@ add_llvm_target(XCoreCodeGen
XCoreSelectionDAGInfo.cpp
)
-add_llvm_library_dependencies(LLVMXCoreCodeGen
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- LLVMXCoreDesc
- LLVMXCoreInfo
- )
-
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/XCore/LLVMBuild.txt b/lib/Target/XCore/LLVMBuild.txt
index 1f7e2d5..53b4a9e 100644
--- a/lib/Target/XCore/LLVMBuild.txt
+++ b/lib/Target/XCore/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = MCTargetDesc TargetInfo
+
[component_0]
type = TargetGroup
name = XCore
@@ -27,4 +30,3 @@ name = XCoreCodeGen
parent = XCore
required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support Target XCoreDesc XCoreInfo
add_to_library_groups = XCore
-
diff --git a/lib/Target/XCore/MCTargetDesc/CMakeLists.txt b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
index 269822d..3a3f5b4 100644
--- a/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
@@ -3,11 +3,6 @@ add_llvm_library(LLVMXCoreDesc
XCoreMCAsmInfo.cpp
)
-add_llvm_library_dependencies(LLVMXCoreDesc
- LLVMMC
- LLVMXCoreInfo
- )
-
add_dependencies(LLVMXCoreDesc XCoreCommonTableGen)
# Hack: we need to include 'main' target directory to grab private headers
diff --git a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
index 628afb5..a80c939 100644
--- a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = XCoreDesc
parent = XCore
required_libraries = MC XCoreInfo
add_to_library_groups = XCore
-
diff --git a/lib/Target/XCore/TargetInfo/CMakeLists.txt b/lib/Target/XCore/TargetInfo/CMakeLists.txt
index 7f84f69..2c34b87 100644
--- a/lib/Target/XCore/TargetInfo/CMakeLists.txt
+++ b/lib/Target/XCore/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMXCoreInfo
XCoreTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMXCoreInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMXCoreInfo XCoreCommonTableGen)
diff --git a/lib/Target/XCore/TargetInfo/LLVMBuild.txt b/lib/Target/XCore/TargetInfo/LLVMBuild.txt
index d0b8e54..770ba87 100644
--- a/lib/Target/XCore/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/XCore/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = XCoreInfo
parent = XCore
required_libraries = MC Support Target
add_to_library_groups = XCore
-
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
index b8fb0ca..08f091e 100644
--- a/lib/Target/XCore/XCore.h
+++ b/lib/Target/XCore/XCore.h
@@ -24,7 +24,8 @@ namespace llvm {
class XCoreTargetMachine;
class formatted_raw_ostream;
- FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM);
+ FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
} // end namespace llvm;
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 7f8b169..5007d04 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -84,7 +84,8 @@ XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti)
}
bool XCoreFrameLowering::hasFP(const MachineFunction &MF) const {
- return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MF.getFrameInfo()->hasVarSizedObjects();
}
void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 8d746ae..7564fba 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -41,8 +41,8 @@ namespace {
const XCoreSubtarget &Subtarget;
public:
- XCoreDAGToDAGISel(XCoreTargetMachine &TM)
- : SelectionDAGISel(TM),
+ XCoreDAGToDAGISel(XCoreTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel),
Lowering(*TM.getTargetLowering()),
Subtarget(*TM.getSubtargetImpl()) { }
@@ -83,8 +83,9 @@ namespace {
/// createXCoreISelDag - This pass converts a legalized DAG into a
/// XCore-specific DAG, ready for instruction scheduling.
///
-FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM) {
- return new XCoreDAGToDAGISel(TM);
+FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new XCoreDAGToDAGISel(TM, OptLevel);
}
bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index d791daa..c5c668e 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -109,6 +109,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::ROTL , MVT::i32, Expand);
setOperationAction(ISD::ROTR , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index eec3674..7e1e035 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -21,9 +21,10 @@ using namespace llvm;
///
XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS),
DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
"i16:16:32-i32:32:32-i64:32:32-n32"),
@@ -34,7 +35,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
}
bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM) {
- PM.add(createXCoreISelDag(*this));
+ PM.add(createXCoreISelDag(*this, getOptLevel()));
return false;
}
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 3f2644d..0159b1e 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -33,7 +33,7 @@ class XCoreTargetMachine : public LLVMTargetMachine {
XCoreSelectionDAGInfo TSInfo;
public:
XCoreTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);