aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/CMakeLists.txt6
-rw-r--r--lib/Analysis/ConstantFolding.cpp133
-rw-r--r--lib/Analysis/DIBuilder.cpp2
-rw-r--r--lib/Analysis/IPA/CMakeLists.txt6
-rw-r--r--lib/Analysis/IPA/LLVMBuild.txt1
-rw-r--r--lib/Analysis/InstructionSimplify.cpp540
-rw-r--r--lib/Analysis/LLVMBuild.txt4
-rw-r--r--lib/Analysis/LazyValueInfo.cpp47
-rw-r--r--lib/Analysis/Lint.cpp9
-rw-r--r--lib/Analysis/LoopInfo.cpp96
-rw-r--r--lib/Analysis/MemDepPrinter.cpp2
-rw-r--r--lib/Analysis/PHITransAddr.cpp9
-rw-r--r--lib/Analysis/ProfileVerifierPass.cpp2
-rw-r--r--lib/Analysis/ScalarEvolution.cpp64
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp115
-rw-r--r--lib/Analysis/ValueTracking.cpp91
-rw-r--r--lib/Archive/ArchiveWriter.cpp6
-rw-r--r--lib/Archive/CMakeLists.txt6
-rw-r--r--lib/Archive/LLVMBuild.txt1
-rw-r--r--lib/AsmParser/CMakeLists.txt5
-rw-r--r--lib/AsmParser/LLParser.cpp74
-rw-r--r--lib/AsmParser/LLParser.h4
-rw-r--r--lib/AsmParser/LLVMBuild.txt1
-rw-r--r--lib/Bitcode/LLVMBuild.txt4
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp331
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.h5
-rw-r--r--lib/Bitcode/Reader/CMakeLists.txt5
-rw-r--r--lib/Bitcode/Reader/LLVMBuild.txt1
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp133
-rw-r--r--lib/Bitcode/Writer/CMakeLists.txt5
-rw-r--r--lib/Bitcode/Writer/LLVMBuild.txt1
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.cpp40
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.h4
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp8
-rw-r--r--lib/CodeGen/Analysis.cpp67
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp32
-rw-r--r--lib/CodeGen/AsmPrinter/CMakeLists.txt10
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp16
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp6
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/LLVMBuild.txt1
-rw-r--r--lib/CodeGen/BranchFolding.cpp20
-rw-r--r--lib/CodeGen/CMakeLists.txt15
-rw-r--r--lib/CodeGen/CodeGen.cpp1
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp8
-rw-r--r--lib/CodeGen/DFAPacketizer.cpp98
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp2
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp2
-rw-r--r--lib/CodeGen/ExpandISelPseudos.cpp3
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp2
-rw-r--r--lib/CodeGen/GCStrategy.cpp2
-rw-r--r--lib/CodeGen/IfConversion.cpp12
-rw-r--r--lib/CodeGen/InlineSpiller.cpp8
-rw-r--r--lib/CodeGen/LLVMBuild.txt4
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp20
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp4
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp2
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp22
-rw-r--r--lib/CodeGen/LiveRangeEdit.h7
-rw-r--r--lib/CodeGen/LiveVariables.cpp6
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp147
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp156
-rw-r--r--lib/CodeGen/MachineCSE.cpp9
-rw-r--r--lib/CodeGen/MachineInstr.cpp116
-rw-r--r--lib/CodeGen/MachineInstrBundle.cpp180
-rw-r--r--lib/CodeGen/MachineLICM.cpp13
-rw-r--r--lib/CodeGen/MachineSSAUpdater.cpp6
-rw-r--r--lib/CodeGen/MachineSink.cpp164
-rw-r--r--lib/CodeGen/MachineVerifier.cpp36
-rw-r--r--lib/CodeGen/PHIElimination.cpp2
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp10
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp7
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp10
-rw-r--r--lib/CodeGen/RegAllocFast.cpp6
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp16
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp10
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp58
-rw-r--r--lib/CodeGen/SelectionDAG/CMakeLists.txt10
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp135
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp33
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/LLVMBuild.txt1
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp20
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp32
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp92
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp76
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp137
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h5
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp177
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp40
-rw-r--r--lib/CodeGen/ShrinkWrapping.cpp2
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp41
-rw-r--r--lib/CodeGen/SplitKit.cpp2
-rw-r--r--lib/CodeGen/Splitter.cpp827
-rw-r--r--lib/CodeGen/Splitter.h101
-rw-r--r--lib/CodeGen/TailDuplication.cpp13
-rw-r--r--lib/CodeGen/TargetFrameLoweringImpl.cpp (renamed from lib/Target/TargetFrameLowering.cpp)2
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp68
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp4
-rw-r--r--lib/CodeGen/TargetOptionsImpl.cpp52
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp34
-rw-r--r--lib/DebugInfo/CMakeLists.txt4
-rw-r--r--lib/DebugInfo/LLVMBuild.txt1
-rw-r--r--lib/ExecutionEngine/CMakeLists.txt7
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp28
-rw-r--r--lib/ExecutionEngine/Interpreter/CMakeLists.txt8
-rw-r--r--lib/ExecutionEngine/Interpreter/LLVMBuild.txt1
-rw-r--r--lib/ExecutionEngine/JIT/CMakeLists.txt10
-rw-r--r--lib/ExecutionEngine/JIT/JIT.cpp11
-rw-r--r--lib/ExecutionEngine/JIT/JIT.h4
-rw-r--r--lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp2
-rw-r--r--lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp2
-rw-r--r--lib/ExecutionEngine/JIT/JITEmitter.cpp14
-rw-r--r--lib/ExecutionEngine/JIT/LLVMBuild.txt1
-rw-r--r--lib/ExecutionEngine/LLVMBuild.txt4
-rw-r--r--lib/ExecutionEngine/MCJIT/CMakeLists.txt8
-rw-r--r--lib/ExecutionEngine/MCJIT/LLVMBuild.txt1
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.cpp7
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.h4
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt5
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt1
-rw-r--r--lib/ExecutionEngine/TargetSelect.cpp27
-rw-r--r--lib/LLVMBuild.txt4
-rw-r--r--lib/Linker/CMakeLists.txt8
-rw-r--r--lib/Linker/LLVMBuild.txt1
-rw-r--r--lib/MC/CMakeLists.txt5
-rw-r--r--lib/MC/ELFObjectWriter.cpp49
-rw-r--r--lib/MC/ELFObjectWriter.h6
-rw-r--r--lib/MC/LLVMBuild.txt4
-rw-r--r--lib/MC/MCAsmInfo.cpp1
-rw-r--r--lib/MC/MCAsmInfoCOFF.cpp8
-rw-r--r--lib/MC/MCAsmInfoDarwin.cpp4
-rw-r--r--lib/MC/MCAsmStreamer.cpp4
-rw-r--r--lib/MC/MCAssembler.cpp71
-rw-r--r--lib/MC/MCDisassembler/CMakeLists.txt7
-rw-r--r--lib/MC/MCDisassembler/EDMain.cpp280
-rw-r--r--lib/MC/MCDwarf.cpp344
-rw-r--r--lib/MC/MCELF.cpp4
-rw-r--r--lib/MC/MCELFStreamer.cpp5
-rw-r--r--lib/MC/MCMachOStreamer.cpp2
-rw-r--r--lib/MC/MCObjectFileInfo.cpp6
-rw-r--r--lib/MC/MCObjectStreamer.cpp4
-rw-r--r--lib/MC/MCParser/AsmParser.cpp55
-rw-r--r--lib/MC/MCParser/CMakeLists.txt5
-rw-r--r--lib/MC/MCParser/ELFAsmParser.cpp1
-rw-r--r--lib/MC/MCParser/LLVMBuild.txt1
-rw-r--r--lib/MC/MachObjectWriter.cpp11
-rw-r--r--lib/MC/WinCOFFObjectWriter.cpp2
-rw-r--r--lib/Object/CMakeLists.txt5
-rw-r--r--lib/Object/COFFObjectFile.cpp19
-rw-r--r--lib/Object/ELFObjectFile.cpp49
-rw-r--r--lib/Object/LLVMBuild.txt1
-rw-r--r--lib/Object/MachOObjectFile.cpp97
-rw-r--r--lib/Object/Object.cpp11
-rw-r--r--lib/Support/APFloat.cpp86
-rw-r--r--lib/Support/APInt.cpp14
-rw-r--r--lib/Support/CommandLine.cpp3
-rw-r--r--lib/Support/DAGDeltaAlgorithm.cpp3
-rw-r--r--lib/Support/Host.cpp2
-rw-r--r--lib/Support/LLVMBuild.txt1
-rw-r--r--lib/Support/Mutex.cpp2
-rw-r--r--lib/Support/Path.cpp26
-rw-r--r--lib/Support/PathV2.cpp126
-rw-r--r--lib/Support/Program.cpp1
-rw-r--r--lib/Support/RWMutex.cpp2
-rw-r--r--lib/Support/Statistic.cpp3
-rw-r--r--lib/Support/ThreadLocal.cpp2
-rw-r--r--lib/Support/Threading.cpp8
-rw-r--r--lib/Support/Triple.cpp7
-rw-r--r--lib/Support/Unix/PathV2.inc41
-rw-r--r--lib/Support/Unix/Program.inc12
-rw-r--r--lib/Support/Valgrind.cpp2
-rw-r--r--lib/Support/Windows/PathV2.inc128
-rw-r--r--lib/Support/Windows/Program.inc36
-rw-r--r--lib/Support/Windows/Signals.inc2
-rw-r--r--lib/Support/Windows/Windows.h106
-rw-r--r--lib/Support/raw_ostream.cpp1
-rw-r--r--lib/TableGen/CMakeLists.txt4
-rw-r--r--lib/TableGen/LLVMBuild.txt1
-rw-r--r--lib/TableGen/TGParser.cpp2
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp30
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp244
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h11
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp4
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp2
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp1001
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp148
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp94
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp6
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.cpp28
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.h8
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp54
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp64
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td192
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td147
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td1379
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td8
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td73
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td62
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp13
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp24
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h3
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp1
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp1143
-rw-r--r--lib/Target/ARM/AsmParser/CMakeLists.txt8
-rw-r--r--lib/Target/ARM/AsmParser/LLVMBuild.txt1
-rw-r--r--lib/Target/ARM/CMakeLists.txt14
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp171
-rw-r--r--lib/Target/ARM/Disassembler/CMakeLists.txt8
-rw-r--r--lib/Target/ARM/Disassembler/LLVMBuild.txt1
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp26
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h6
-rw-r--r--lib/Target/ARM/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/ARM/InstPrinter/LLVMBuild.txt1
-rw-r--r--lib/Target/ARM/LLVMBuild.txt4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp46
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp9
-rw-r--r--lib/Target/ARM/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/ARM/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp8
-rw-r--r--lib/Target/ARM/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/ARM/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp7
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp8
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp46
-rw-r--r--lib/Target/CBackend/CMakeLists.txt12
-rw-r--r--lib/Target/CBackend/CTargetMachine.h4
-rw-r--r--lib/Target/CBackend/LLVMBuild.txt4
-rw-r--r--lib/Target/CBackend/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/CBackend/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/CMakeLists.txt7
-rw-r--r--lib/Target/CellSPU/CMakeLists.txt12
-rw-r--r--lib/Target/CellSPU/LLVMBuild.txt4
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt5
-rw-r--r--lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/CellSPU/SPUFrameLowering.cpp3
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp10
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp3
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.h2
-rw-r--r--lib/Target/CellSPU/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/CellSPU/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/CppBackend/CMakeLists.txt7
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h4
-rw-r--r--lib/Target/CppBackend/LLVMBuild.txt4
-rw-r--r--lib/Target/CppBackend/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/CppBackend/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt35
-rw-r--r--lib/Target/Hexagon/Hexagon.h54
-rw-r--r--lib/Target/Hexagon/Hexagon.td66
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp555
-rw-r--r--lib/Target/Hexagon/HexagonCFGOptimizer.cpp240
-rw-r--r--lib/Target/Hexagon/HexagonCallingConv.td35
-rw-r--r--lib/Target/Hexagon/HexagonCallingConvLower.cpp207
-rw-r--r--lib/Target/Hexagon/HexagonCallingConvLower.h189
-rw-r--r--lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp184
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp333
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.h50
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp644
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp1495
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp1505
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h162
-rw-r--r--lib/Target/Hexagon/HexagonImmediates.td491
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormats.td242
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormatsV4.td46
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp1459
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h166
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td3014
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV3.td134
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td3392
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsics.td3462
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsDerived.td29
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsV3.td50
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsV4.td369
-rw-r--r--lib/Target/Hexagon/HexagonMachineFunctionInfo.h75
-rw-r--r--lib/Target/Hexagon/HexagonOptimizeSZExtends.cpp129
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp323
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h89
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.td169
-rw-r--r--lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp85
-rw-r--r--lib/Target/Hexagon/HexagonSchedule.td53
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV4.td56
-rw-r--r--lib/Target/Hexagon/HexagonSelectCCInfo.td121
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp46
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.h40
-rw-r--r--lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp136
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp59
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h74
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp118
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h86
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.cpp94
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.h40
-rw-r--r--lib/Target/Hexagon/HexagonVarargsCallingConvention.h141
-rw-r--r--lib/Target/Hexagon/LLVMBuild.txt32
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt6
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp36
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h30
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp94
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h40
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt23
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/Makefile16
-rw-r--r--lib/Target/Hexagon/Makefile23
-rw-r--r--lib/Target/Hexagon/TargetInfo/CMakeLists.txt8
-rw-r--r--lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp19
-rw-r--r--lib/Target/Hexagon/TargetInfo/LLVMBuild.txt23
-rw-r--r--lib/Target/Hexagon/TargetInfo/Makefile15
-rw-r--r--lib/Target/LLVMBuild.txt3
-rw-r--r--lib/Target/MBlaze/AsmParser/CMakeLists.txt7
-rw-r--r--lib/Target/MBlaze/AsmParser/LLVMBuild.txt1
-rw-r--r--lib/Target/MBlaze/CMakeLists.txt13
-rw-r--r--lib/Target/MBlaze/Disassembler/CMakeLists.txt7
-rw-r--r--lib/Target/MBlaze/Disassembler/LLVMBuild.txt1
-rw-r--r--lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp18
-rw-r--r--lib/Target/MBlaze/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/MBlaze/InstPrinter/LLVMBuild.txt1
-rw-r--r--lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h2
-rw-r--r--lib/Target/MBlaze/LLVMBuild.txt4
-rw-r--r--lib/Target/MBlaze/MBlazeAsmPrinter.cpp4
-rw-r--r--lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp22
-rw-r--r--lib/Target/MBlaze/MBlazeFrameLowering.cpp17
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFormats.td24
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.td169
-rw-r--r--lib/Target/MBlaze/MBlazeMCInstLower.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.cpp18
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.h1
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp17
-rw-r--r--lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h1
-rw-r--r--lib/Target/MBlaze/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/MBlaze/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/MSP430/CMakeLists.txt13
-rw-r--r--lib/Target/MSP430/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/MSP430/InstPrinter/LLVMBuild.txt1
-rw-r--r--lib/Target/MSP430/LLVMBuild.txt4
-rw-r--r--lib/Target/MSP430/MCTargetDesc/CMakeLists.txt8
-rw-r--r--lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.cpp4
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp4
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp9
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp3
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h2
-rw-r--r--lib/Target/MSP430/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/MSP430/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/Mips/CMakeLists.txt13
-rw-r--r--lib/Target/Mips/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/Mips/InstPrinter/LLVMBuild.txt1
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp4
-rw-r--r--lib/Target/Mips/LLVMBuild.txt4
-rw-r--r--lib/Target/Mips/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/Mips/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp85
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h11
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h102
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp15
-rw-r--r--lib/Target/Mips/Mips.td6
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td48
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp20
-rw-r--r--lib/Target/Mips/MipsCodeEmitter.cpp4
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp17
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp4
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp47
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp192
-rw-r--r--lib/Target/Mips/MipsISelLowering.h9
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td2
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp17
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h1
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td161
-rw-r--r--lib/Target/Mips/MipsMCInstLower.cpp37
-rw-r--r--lib/Target/Mips/MipsMCInstLower.h2
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp1
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td3
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp2
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp53
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h8
-rw-r--r--lib/Target/Mips/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/Mips/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/PTX/CMakeLists.txt14
-rw-r--r--lib/Target/PTX/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/PTX/InstPrinter/LLVMBuild.txt1
-rw-r--r--lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp47
-rw-r--r--lib/Target/PTX/LLVMBuild.txt4
-rw-r--r--lib/Target/PTX/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/PTX/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h71
-rw-r--r--lib/Target/PTX/PTXAsmPrinter.cpp69
-rw-r--r--lib/Target/PTX/PTXFPRoundingModePass.cpp6
-rw-r--r--lib/Target/PTX/PTXISelLowering.cpp35
-rw-r--r--lib/Target/PTX/PTXInstrInfo.cpp17
-rw-r--r--lib/Target/PTX/PTXInstrInfo.td12
-rw-r--r--lib/Target/PTX/PTXMFInfoExtract.cpp21
-rw-r--r--lib/Target/PTX/PTXMachineFunctionInfo.h154
-rw-r--r--lib/Target/PTX/PTXTargetMachine.cpp24
-rw-r--r--lib/Target/PTX/PTXTargetMachine.h6
-rw-r--r--lib/Target/PTX/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/PTX/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt14
-rw-r--r--lib/Target/PowerPC/InstPrinter/CMakeLists.txt5
-rw-r--r--lib/Target/PowerPC/InstPrinter/LLVMBuild.txt1
-rw-r--r--lib/Target/PowerPC/LLVMBuild.txt4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp10
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp5
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp6
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp18
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp179
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.h13
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp6
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp39
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td18
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp120
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h5
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td13
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp130
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp20
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h4
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp11
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h6
-rw-r--r--lib/Target/PowerPC/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/PowerPC/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/Sparc/CMakeLists.txt12
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp16
-rw-r--r--lib/Target/Sparc/LLVMBuild.txt4
-rw-r--r--lib/Target/Sparc/MCTargetDesc/CMakeLists.txt6
-rw-r--r--lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/Sparc/SparcAsmPrinter.cpp4
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp2
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp4
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp15
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h6
-rw-r--r--lib/Target/Sparc/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/Sparc/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/TargetInstrInfo.cpp42
-rw-r--r--lib/Target/TargetLibraryInfo.cpp91
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp6
-rw-r--r--lib/Target/TargetMachine.cpp192
-rw-r--r--lib/Target/TargetRegisterInfo.cpp2
-rw-r--r--lib/Target/X86/AsmParser/CMakeLists.txt8
-rw-r--r--lib/Target/X86/AsmParser/LLVMBuild.txt1
-rw-r--r--lib/Target/X86/CMakeLists.txt15
-rw-r--r--lib/Target/X86/Disassembler/CMakeLists.txt6
-rw-r--r--lib/Target/X86/Disassembler/LLVMBuild.txt1
-rw-r--r--lib/Target/X86/InstPrinter/CMakeLists.txt6
-rw-r--r--lib/Target/X86/InstPrinter/LLVMBuild.txt1
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp244
-rw-r--r--lib/Target/X86/LLVMBuild.txt4
-rw-r--r--lib/Target/X86/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/X86/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp13
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h20
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp14
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h8
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp99
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp6
-rw-r--r--lib/Target/X86/README-SSE.txt13
-rw-r--r--lib/Target/X86/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/X86/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/X86/Utils/CMakeLists.txt5
-rw-r--r--lib/Target/X86/Utils/LLVMBuild.txt1
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp110
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h47
-rw-r--r--lib/Target/X86/X86.td18
-rw-r--r--lib/Target/X86/X86CallingConv.td13
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp2
-rw-r--r--lib/Target/X86/X86FastISel.cpp10
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp145
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp900
-rw-r--r--lib/Target/X86/X86ISelLowering.h25
-rw-r--r--lib/Target/X86/X86InstrFMA.td388
-rw-r--r--lib/Target/X86/X86InstrFormats.td37
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td30
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp37
-rw-r--r--lib/Target/X86/X86InstrInfo.td5
-rw-r--r--lib/Target/X86/X86InstrSSE.td801
-rw-r--r--lib/Target/X86/X86InstrXOP.td243
-rw-r--r--lib/Target/X86/X86JITInfo.cpp2
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp4
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp22
-rw-r--r--lib/Target/X86/X86Subtarget.cpp6
-rw-r--r--lib/Target/X86/X86Subtarget.h4
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp18
-rw-r--r--lib/Target/X86/X86TargetMachine.h6
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp2
-rw-r--r--lib/Target/XCore/CMakeLists.txt12
-rw-r--r--lib/Target/XCore/LLVMBuild.txt4
-rw-r--r--lib/Target/XCore/MCTargetDesc/CMakeLists.txt5
-rw-r--r--lib/Target/XCore/MCTargetDesc/LLVMBuild.txt1
-rw-r--r--lib/Target/XCore/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/XCore/TargetInfo/LLVMBuild.txt1
-rw-r--r--lib/Target/XCore/XCore.h3
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp3
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp9
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp2
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp5
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h2
-rw-r--r--lib/Transforms/IPO/CMakeLists.txt11
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp33
-rw-r--r--lib/Transforms/IPO/LLVMBuild.txt1
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp3
-rw-r--r--lib/Transforms/InstCombine/CMakeLists.txt8
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h6
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp4
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp14
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp28
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp38
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp3
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp30
-rw-r--r--lib/Transforms/InstCombine/LLVMBuild.txt1
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp50
-rw-r--r--lib/Transforms/Instrumentation/CMakeLists.txt7
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp139
-rw-r--r--lib/Transforms/Instrumentation/LLVMBuild.txt1
-rw-r--r--lib/Transforms/LLVMBuild.txt4
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt9
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp11
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp13
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp4
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp7
-rw-r--r--lib/Transforms/Scalar/GVN.cpp9
-rw-r--r--lib/Transforms/Scalar/GlobalMerge.cpp2
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp44
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp16
-rw-r--r--lib/Transforms/Scalar/LICM.cpp17
-rw-r--r--lib/Transforms/Scalar/LLVMBuild.txt1
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp6
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp148
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp57
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp104
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp4
-rw-r--r--lib/Transforms/Scalar/ObjCARC.cpp162
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp27
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp2
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp5
-rw-r--r--lib/Transforms/Scalar/Sink.cpp3
-rw-r--r--lib/Transforms/Utils/AddrModeMatcher.cpp9
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp15
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp3
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt9
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp62
-rw-r--r--lib/Transforms/Utils/LLVMBuild.txt1
-rw-r--r--lib/Transforms/Utils/Local.cpp38
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp64
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp29
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp374
-rw-r--r--lib/Transforms/Utils/ModuleUtils.cpp16
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp2
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp4
-rw-r--r--lib/Transforms/Utils/SimplifyInstructions.cpp12
-rw-r--r--lib/VMCore/AsmWriter.cpp3
-rw-r--r--lib/VMCore/AutoUpgrade.cpp587
-rw-r--r--lib/VMCore/CMakeLists.txt2
-rw-r--r--lib/VMCore/ConstantFold.cpp2
-rw-r--r--lib/VMCore/Constants.cpp16
-rw-r--r--lib/VMCore/Instruction.cpp53
-rw-r--r--lib/VMCore/Instructions.cpp36
-rw-r--r--lib/VMCore/LLVMBuild.txt1
-rw-r--r--lib/VMCore/Metadata.cpp4
-rw-r--r--lib/VMCore/Type.cpp10
-rw-r--r--lib/VMCore/User.cpp2
-rw-r--r--lib/VMCore/Value.cpp13
-rw-r--r--lib/VMCore/Verifier.cpp78
566 files changed, 33520 insertions, 8199 deletions
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index e79459d..cb1e1eb 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -58,10 +58,4 @@ add_llvm_library(LLVMAnalysis
ValueTracking.cpp
)
-add_llvm_library_dependencies(LLVMAnalysis
- LLVMCore
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(IPA)
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index df79849..8e2f263 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -26,6 +26,7 @@
#include "llvm/Operator.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/ErrorHandling.h"
@@ -542,8 +543,8 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
/// explicitly cast them so that they aren't implicitly casted by the
/// getelementptr.
static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
- Type *ResultTy,
- const TargetData *TD) {
+ Type *ResultTy, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
if (!TD) return 0;
Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext());
@@ -568,7 +569,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
Constant *C =
ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
return C;
}
@@ -576,10 +577,11 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
/// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP
/// constant expression, do so.
static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
- Type *ResultTy,
- const TargetData *TD) {
+ Type *ResultTy, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
Constant *Ptr = Ops[0];
- if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized())
+ if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized() ||
+ !Ptr->getType()->isPointerTy())
return 0;
Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext());
@@ -602,7 +604,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
Res = ConstantExpr::getSub(Res, CE->getOperand(1));
Res = ConstantExpr::getIntToPtr(Res, ResultTy);
if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res))
- Res = ConstantFoldConstantExpression(ResCE, TD);
+ Res = ConstantFoldConstantExpression(ResCE, TD, TLI);
return Res;
}
}
@@ -729,7 +731,9 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
/// Note that this fails if not all of the operands are constant. Otherwise,
/// this function can only fail when attempting to fold instructions like loads
/// and stores, which have no constant expression form.
-Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
+Constant *llvm::ConstantFoldInstruction(Instruction *I,
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
// Handle PHI nodes quickly here...
if (PHINode *PN = dyn_cast<PHINode>(I)) {
Constant *CommonValue = 0;
@@ -765,7 +769,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
- TD);
+ TD, TLI);
if (const LoadInst *LI = dyn_cast<LoadInst>(I))
return ConstantFoldLoadInst(LI, TD);
@@ -781,28 +785,29 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
cast<Constant>(EVI->getAggregateOperand()),
EVI->getIndices());
- return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD);
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI);
}
/// ConstantFoldConstantExpression - Attempt to fold the constant expression
/// using the specified TargetData. If successful, the constant result is
/// result is returned, if not, null is returned.
Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
- const TargetData *TD) {
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
SmallVector<Constant*, 8> Ops;
for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end();
i != e; ++i) {
Constant *NewC = cast<Constant>(*i);
// Recursively fold the ConstantExpr's operands.
if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC))
- NewC = ConstantFoldConstantExpression(NewCE, TD);
+ NewC = ConstantFoldConstantExpression(NewCE, TD, TLI);
Ops.push_back(NewC);
}
if (CE->isCompare())
return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
- TD);
- return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD);
+ TD, TLI);
+ return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI);
}
/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
@@ -817,7 +822,8 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
///
Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
ArrayRef<Constant *> Ops,
- const TargetData *TD) {
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
// Handle easy binops first.
if (Instruction::isBinaryOp(Opcode)) {
if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1]))
@@ -834,7 +840,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
case Instruction::Call:
if (Function *F = dyn_cast<Function>(Ops.back()))
if (canConstantFoldCallTo(F))
- return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1));
+ return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1), TLI);
return 0;
case Instruction::PtrToInt:
// If the input is a inttoptr, eliminate the pair. This requires knowing
@@ -888,9 +894,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
case Instruction::ShuffleVector:
return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
case Instruction::GetElementPtr:
- if (Constant *C = CastGEPIndices(Ops, DestTy, TD))
+ if (Constant *C = CastGEPIndices(Ops, DestTy, TD, TLI))
return C;
- if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD))
+ if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD, TLI))
return C;
return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1));
@@ -903,7 +909,8 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
///
Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
Constant *Ops0, Constant *Ops1,
- const TargetData *TD) {
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
// fold: icmp (inttoptr x), null -> icmp x, 0
// fold: icmp (ptrtoint x), 0 -> icmp x, null
// fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
@@ -920,7 +927,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
IntPtrTy, false);
Constant *Null = Constant::getNullValue(C->getType());
- return ConstantFoldCompareInstOperands(Predicate, C, Null, TD);
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
}
// Only do this transformation if the int is intptrty in size, otherwise
@@ -929,7 +936,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
CE0->getType() == IntPtrTy) {
Constant *C = CE0->getOperand(0);
Constant *Null = Constant::getNullValue(C->getType());
- return ConstantFoldCompareInstOperands(Predicate, C, Null, TD);
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
}
}
@@ -944,7 +951,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
IntPtrTy, false);
Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
IntPtrTy, false);
- return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD);
+ return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD, TLI);
}
// Only do this transformation if the int is intptrty in size, otherwise
@@ -953,7 +960,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
CE0->getType() == IntPtrTy &&
CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()))
return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0),
- CE1->getOperand(0), TD);
+ CE1->getOperand(0), TD, TLI);
}
}
@@ -962,13 +969,15 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) &&
CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) {
Constant *LHS =
- ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,TD);
+ ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,
+ TD, TLI);
Constant *RHS =
- ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,TD);
+ ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,
+ TD, TLI);
unsigned OpC =
Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
Constant *Ops[] = { LHS, RHS };
- return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD);
+ return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD, TLI);
}
}
@@ -1045,6 +1054,7 @@ bool
llvm::canConstantFoldCallTo(const Function *F) {
switch (F->getIntrinsicID()) {
case Intrinsic::sqrt:
+ case Intrinsic::pow:
case Intrinsic::powi:
case Intrinsic::bswap:
case Intrinsic::ctpop:
@@ -1168,7 +1178,8 @@ static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero,
/// ConstantFoldCall - Attempt to constant fold a call to the specified function
/// with the specified arguments, returning null if unsuccessful.
Constant *
-llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
+llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
+ const TargetLibraryInfo *TLI) {
if (!F->hasName()) return 0;
StringRef Name = F->getName();
@@ -1183,6 +1194,8 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
return ConstantInt::get(F->getContext(), Val.bitcastToAPInt());
}
+ if (!TLI)
+ return 0;
if (!Ty->isFloatTy() && !Ty->isDoubleTy())
return 0;
@@ -1201,43 +1214,43 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
Op->getValueAPF().convertToDouble();
switch (Name[0]) {
case 'a':
- if (Name == "acos")
+ if (Name == "acos" && TLI->has(LibFunc::acos))
return ConstantFoldFP(acos, V, Ty);
- else if (Name == "asin")
+ else if (Name == "asin" && TLI->has(LibFunc::asin))
return ConstantFoldFP(asin, V, Ty);
- else if (Name == "atan")
+ else if (Name == "atan" && TLI->has(LibFunc::atan))
return ConstantFoldFP(atan, V, Ty);
break;
case 'c':
- if (Name == "ceil")
+ if (Name == "ceil" && TLI->has(LibFunc::ceil))
return ConstantFoldFP(ceil, V, Ty);
- else if (Name == "cos")
+ else if (Name == "cos" && TLI->has(LibFunc::cos))
return ConstantFoldFP(cos, V, Ty);
- else if (Name == "cosh")
+ else if (Name == "cosh" && TLI->has(LibFunc::cosh))
return ConstantFoldFP(cosh, V, Ty);
- else if (Name == "cosf")
+ else if (Name == "cosf" && TLI->has(LibFunc::cosf))
return ConstantFoldFP(cos, V, Ty);
break;
case 'e':
- if (Name == "exp")
+ if (Name == "exp" && TLI->has(LibFunc::exp))
return ConstantFoldFP(exp, V, Ty);
- if (Name == "exp2") {
+ if (Name == "exp2" && TLI->has(LibFunc::exp2)) {
// Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
// C99 library.
return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
}
break;
case 'f':
- if (Name == "fabs")
+ if (Name == "fabs" && TLI->has(LibFunc::fabs))
return ConstantFoldFP(fabs, V, Ty);
- else if (Name == "floor")
+ else if (Name == "floor" && TLI->has(LibFunc::floor))
return ConstantFoldFP(floor, V, Ty);
break;
case 'l':
- if (Name == "log" && V > 0)
+ if (Name == "log" && V > 0 && TLI->has(LibFunc::log))
return ConstantFoldFP(log, V, Ty);
- else if (Name == "log10" && V > 0)
+ else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10))
return ConstantFoldFP(log10, V, Ty);
else if (F->getIntrinsicID() == Intrinsic::sqrt &&
(Ty->isFloatTy() || Ty->isDoubleTy())) {
@@ -1248,21 +1261,21 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
}
break;
case 's':
- if (Name == "sin")
+ if (Name == "sin" && TLI->has(LibFunc::sin))
return ConstantFoldFP(sin, V, Ty);
- else if (Name == "sinh")
+ else if (Name == "sinh" && TLI->has(LibFunc::sinh))
return ConstantFoldFP(sinh, V, Ty);
- else if (Name == "sqrt" && V >= 0)
+ else if (Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt))
return ConstantFoldFP(sqrt, V, Ty);
- else if (Name == "sqrtf" && V >= 0)
+ else if (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf))
return ConstantFoldFP(sqrt, V, Ty);
- else if (Name == "sinf")
+ else if (Name == "sinf" && TLI->has(LibFunc::sinf))
return ConstantFoldFP(sin, V, Ty);
break;
case 't':
- if (Name == "tan")
+ if (Name == "tan" && TLI->has(LibFunc::tan))
return ConstantFoldFP(tan, V, Ty);
- else if (Name == "tanh")
+ else if (Name == "tanh" && TLI->has(LibFunc::tanh))
return ConstantFoldFP(tanh, V, Ty);
break;
default:
@@ -1277,10 +1290,6 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
return ConstantInt::get(F->getContext(), Op->getValue().byteSwap());
case Intrinsic::ctpop:
return ConstantInt::get(Ty, Op->getValue().countPopulation());
- case Intrinsic::cttz:
- return ConstantInt::get(Ty, Op->getValue().countTrailingZeros());
- case Intrinsic::ctlz:
- return ConstantInt::get(Ty, Op->getValue().countLeadingZeros());
case Intrinsic::convert_from_fp16: {
APFloat Val(Op->getValue());
@@ -1337,16 +1346,21 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
if (Op2->getType() != Op1->getType())
return 0;
-
+
double Op2V = Ty->isFloatTy() ?
(double)Op2->getValueAPF().convertToFloat():
Op2->getValueAPF().convertToDouble();
- if (Name == "pow")
+ if (F->getIntrinsicID() == Intrinsic::pow) {
return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
- if (Name == "fmod")
+ }
+ if (!TLI)
+ return 0;
+ if (Name == "pow" && TLI->has(LibFunc::pow))
+ return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+ if (Name == "fmod" && TLI->has(LibFunc::fmod))
return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
- if (Name == "atan2")
+ if (Name == "atan2" && TLI->has(LibFunc::atan2))
return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
} else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy())
@@ -1361,7 +1375,6 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
return 0;
}
-
if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
switch (F->getIntrinsicID()) {
@@ -1401,6 +1414,14 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
};
return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops);
}
+ case Intrinsic::cttz:
+ // FIXME: This should check for Op2 == 1, and become unreachable if
+ // Op1 == 0.
+ return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros());
+ case Intrinsic::ctlz:
+ // FIXME: This should check for Op2 == 1, and become unreachable if
+ // Op1 == 0.
+ return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros());
}
}
diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp
index ed3e8f4..85dcc46 100644
--- a/lib/Analysis/DIBuilder.cpp
+++ b/lib/Analysis/DIBuilder.cpp
@@ -777,7 +777,7 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ NULL,
ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
Fn,
diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt
index eae83fd..8ffef29 100644
--- a/lib/Analysis/IPA/CMakeLists.txt
+++ b/lib/Analysis/IPA/CMakeLists.txt
@@ -5,9 +5,3 @@ add_llvm_library(LLVMipa
GlobalsModRef.cpp
IPA.cpp
)
-
-add_llvm_library_dependencies(LLVMipa
- LLVMAnalysis
- LLVMCore
- LLVMSupport
- )
diff --git a/lib/Analysis/IPA/LLVMBuild.txt b/lib/Analysis/IPA/LLVMBuild.txt
index fb16278..980e918 100644
--- a/lib/Analysis/IPA/LLVMBuild.txt
+++ b/lib/Analysis/IPA/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = IPA
parent = Libraries
library_name = ipa
required_libraries = Analysis Core Support
-
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 2f41f72..f1cfd6c 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -38,22 +38,25 @@ STATISTIC(NumFactor , "Number of factorizations");
STATISTIC(NumReassoc, "Number of reassociations");
static Value *SimplifyAndInst(Value *, Value *, const TargetData *,
- const DominatorTree *, unsigned);
+ const TargetLibraryInfo *, const DominatorTree *,
+ unsigned);
static Value *SimplifyBinOp(unsigned, Value *, Value *, const TargetData *,
- const DominatorTree *, unsigned);
+ const TargetLibraryInfo *, const DominatorTree *,
+ unsigned);
static Value *SimplifyCmpInst(unsigned, Value *, Value *, const TargetData *,
- const DominatorTree *, unsigned);
+ const TargetLibraryInfo *, const DominatorTree *,
+ unsigned);
static Value *SimplifyOrInst(Value *, Value *, const TargetData *,
- const DominatorTree *, unsigned);
+ const TargetLibraryInfo *, const DominatorTree *,
+ unsigned);
static Value *SimplifyXorInst(Value *, Value *, const TargetData *,
- const DominatorTree *, unsigned);
+ const TargetLibraryInfo *, const DominatorTree *,
+ unsigned);
/// getFalse - For a boolean type, or a vector of boolean type, return false, or
/// a vector with every element false, as appropriate for the type.
static Constant *getFalse(Type *Ty) {
- assert((Ty->isIntegerTy(1) ||
- (Ty->isVectorTy() &&
- cast<VectorType>(Ty)->getElementType()->isIntegerTy(1))) &&
+ assert(Ty->getScalarType()->isIntegerTy(1) &&
"Expected i1 type or a vector of i1!");
return Constant::getNullValue(Ty);
}
@@ -61,9 +64,7 @@ static Constant *getFalse(Type *Ty) {
/// getTrue - For a boolean type, or a vector of boolean type, return true, or
/// a vector with every element true, as appropriate for the type.
static Constant *getTrue(Type *Ty) {
- assert((Ty->isIntegerTy(1) ||
- (Ty->isVectorTy() &&
- cast<VectorType>(Ty)->getElementType()->isIntegerTy(1))) &&
+ assert(Ty->getScalarType()->isIntegerTy(1) &&
"Expected i1 type or a vector of i1!");
return Constant::getAllOnesValue(Ty);
}
@@ -109,7 +110,8 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
/// Returns the simplified value, or null if no simplification was performed.
static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
unsigned OpcToExpand, const TargetData *TD,
- const DominatorTree *DT, unsigned MaxRecurse) {
+ const TargetLibraryInfo *TLI, const DominatorTree *DT,
+ unsigned MaxRecurse) {
Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand;
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
@@ -121,8 +123,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
// It does! Try turning it into "(A op C) op' (B op C)".
Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
// Do "A op C" and "B op C" both simplify?
- if (Value *L = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse))
- if (Value *R = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) {
+ if (Value *L = SimplifyBinOp(Opcode, A, C, TD, TLI, DT, MaxRecurse))
+ if (Value *R = SimplifyBinOp(Opcode, B, C, TD, TLI, DT, MaxRecurse)) {
// They do! Return "L op' R" if it simplifies or is already available.
// If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand)
@@ -131,7 +133,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return LHS;
}
// Otherwise return "L op' R" if it simplifies.
- if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT,
+ if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, TLI, DT,
MaxRecurse)) {
++NumExpand;
return V;
@@ -145,8 +147,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
// It does! Try turning it into "(A op B) op' (A op C)".
Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
// Do "A op B" and "A op C" both simplify?
- if (Value *L = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse))
- if (Value *R = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse)) {
+ if (Value *L = SimplifyBinOp(Opcode, A, B, TD, TLI, DT, MaxRecurse))
+ if (Value *R = SimplifyBinOp(Opcode, A, C, TD, TLI, DT, MaxRecurse)) {
// They do! Return "L op' R" if it simplifies or is already available.
// If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand)
@@ -155,7 +157,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return RHS;
}
// Otherwise return "L op' R" if it simplifies.
- if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT,
+ if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, TLI, DT,
MaxRecurse)) {
++NumExpand;
return V;
@@ -171,8 +173,10 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
/// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)".
/// Returns the simplified value, or null if no simplification was performed.
static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- unsigned OpcToExtract, const TargetData *TD,
- const DominatorTree *DT, unsigned MaxRecurse) {
+ unsigned OpcToExtract, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT,
+ unsigned MaxRecurse) {
Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract;
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
@@ -196,7 +200,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
Value *DD = A == C ? D : C;
// Form "A op' (B op DD)" if it simplifies completely.
// Does "B op DD" simplify?
- if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, DT, MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, TLI, DT, MaxRecurse)) {
// It does! Return "A op' V" if it simplifies or is already available.
// If V equals B then "A op' V" is just the LHS. If V equals DD then
// "A op' V" is just the RHS.
@@ -205,7 +209,8 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return V == B ? LHS : RHS;
}
// Otherwise return "A op' V" if it simplifies.
- if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, DT, MaxRecurse)) {
+ if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, TLI, DT,
+ MaxRecurse)) {
++NumFactor;
return W;
}
@@ -219,7 +224,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
Value *CC = B == D ? C : D;
// Form "(A op CC) op' B" if it simplifies completely..
// Does "A op CC" simplify?
- if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, DT, MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, TLI, DT, MaxRecurse)) {
// It does! Return "V op' B" if it simplifies or is already available.
// If V equals A then "V op' B" is just the LHS. If V equals CC then
// "V op' B" is just the RHS.
@@ -228,7 +233,8 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return V == A ? LHS : RHS;
}
// Otherwise return "V op' B" if it simplifies.
- if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, DT, MaxRecurse)) {
+ if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, TLI, DT,
+ MaxRecurse)) {
++NumFactor;
return W;
}
@@ -242,6 +248,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
/// operations. Returns the simpler value, or null if none was found.
static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT,
unsigned MaxRecurse) {
Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc;
@@ -261,12 +268,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
Value *C = RHS;
// Does "B op C" simplify?
- if (Value *V = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(Opcode, B, C, TD, TLI, DT, MaxRecurse)) {
// It does! Return "A op V" if it simplifies or is already available.
// If V equals B then "A op V" is just the LHS.
if (V == B) return LHS;
// Otherwise return "A op V" if it simplifies.
- if (Value *W = SimplifyBinOp(Opcode, A, V, TD, DT, MaxRecurse)) {
+ if (Value *W = SimplifyBinOp(Opcode, A, V, TD, TLI, DT, MaxRecurse)) {
++NumReassoc;
return W;
}
@@ -280,12 +287,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
Value *C = Op1->getOperand(1);
// Does "A op B" simplify?
- if (Value *V = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(Opcode, A, B, TD, TLI, DT, MaxRecurse)) {
// It does! Return "V op C" if it simplifies or is already available.
// If V equals B then "V op C" is just the RHS.
if (V == B) return RHS;
// Otherwise return "V op C" if it simplifies.
- if (Value *W = SimplifyBinOp(Opcode, V, C, TD, DT, MaxRecurse)) {
+ if (Value *W = SimplifyBinOp(Opcode, V, C, TD, TLI, DT, MaxRecurse)) {
++NumReassoc;
return W;
}
@@ -303,12 +310,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
Value *C = RHS;
// Does "C op A" simplify?
- if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(Opcode, C, A, TD, TLI, DT, MaxRecurse)) {
// It does! Return "V op B" if it simplifies or is already available.
// If V equals A then "V op B" is just the LHS.
if (V == A) return LHS;
// Otherwise return "V op B" if it simplifies.
- if (Value *W = SimplifyBinOp(Opcode, V, B, TD, DT, MaxRecurse)) {
+ if (Value *W = SimplifyBinOp(Opcode, V, B, TD, TLI, DT, MaxRecurse)) {
++NumReassoc;
return W;
}
@@ -322,12 +329,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
Value *C = Op1->getOperand(1);
// Does "C op A" simplify?
- if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) {
+ if (Value *V = SimplifyBinOp(Opcode, C, A, TD, TLI, DT, MaxRecurse)) {
// It does! Return "B op V" if it simplifies or is already available.
// If V equals C then "B op V" is just the RHS.
if (V == C) return RHS;
// Otherwise return "B op V" if it simplifies.
- if (Value *W = SimplifyBinOp(Opcode, B, V, TD, DT, MaxRecurse)) {
+ if (Value *W = SimplifyBinOp(Opcode, B, V, TD, TLI, DT, MaxRecurse)) {
++NumReassoc;
return W;
}
@@ -343,6 +350,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
/// Returns the common value if so, otherwise returns null.
static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT,
unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
@@ -361,11 +369,11 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
Value *TV;
Value *FV;
if (SI == LHS) {
- TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, DT, MaxRecurse);
- FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, DT, MaxRecurse);
+ TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, TLI, DT, MaxRecurse);
+ FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, TLI, DT, MaxRecurse);
} else {
- TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, DT, MaxRecurse);
- FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, DT, MaxRecurse);
+ TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, TLI, DT, MaxRecurse);
+ FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, TLI, DT, MaxRecurse);
}
// If they simplified to the same value, then return the common value.
@@ -417,6 +425,7 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
/// null.
static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT,
unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
@@ -436,7 +445,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
// Now that we have "cmp select(Cond, TV, FV), RHS", analyse it.
// Does "cmp TV, RHS" simplify?
- Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, TD, DT, MaxRecurse);
+ Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, TD, TLI, DT, MaxRecurse);
if (TCmp == Cond) {
// It not only simplified, it simplified to the select condition. Replace
// it with 'true'.
@@ -450,7 +459,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
}
// Does "cmp FV, RHS" simplify?
- Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, TD, DT, MaxRecurse);
+ Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, TD, TLI, DT, MaxRecurse);
if (FCmp == Cond) {
// It not only simplified, it simplified to the select condition. Replace
// it with 'false'.
@@ -471,19 +480,19 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
// is equal to "Cond && TCmp". This also catches the case when the false
// value simplified to false and the true value to true, returning "Cond".
if (match(FCmp, m_Zero()))
- if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse))
+ if (Value *V = SimplifyAndInst(Cond, TCmp, TD, TLI, DT, MaxRecurse))
return V;
// If the true value simplified to true, then the result of the compare
// is equal to "Cond || FCmp".
if (match(TCmp, m_One()))
- if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse))
+ if (Value *V = SimplifyOrInst(Cond, FCmp, TD, TLI, DT, MaxRecurse))
return V;
// Finally, if the false value simplified to true and the true value to
// false, then the result of the compare is equal to "!Cond".
if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
if (Value *V =
SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()),
- TD, DT, MaxRecurse))
+ TD, TLI, DT, MaxRecurse))
return V;
return 0;
@@ -494,7 +503,9 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
/// it on the incoming phi values yields the same result for every value. If so
/// returns the common value, otherwise returns null.
static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
- const TargetData *TD, const DominatorTree *DT,
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT,
unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
@@ -521,8 +532,8 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
// If the incoming value is the phi node itself, it can safely be skipped.
if (Incoming == PI) continue;
Value *V = PI == LHS ?
- SimplifyBinOp(Opcode, Incoming, RHS, TD, DT, MaxRecurse) :
- SimplifyBinOp(Opcode, LHS, Incoming, TD, DT, MaxRecurse);
+ SimplifyBinOp(Opcode, Incoming, RHS, TD, TLI, DT, MaxRecurse) :
+ SimplifyBinOp(Opcode, LHS, Incoming, TD, TLI, DT, MaxRecurse);
// If the operation failed to simplify, or simplified to a different value
// to previously, then give up.
if (!V || (CommonValue && V != CommonValue))
@@ -538,7 +549,9 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
/// incoming phi values yields the same result every time. If so returns the
/// common result, otherwise returns null.
static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
- const TargetData *TD, const DominatorTree *DT,
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT,
unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
@@ -562,7 +575,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
Value *Incoming = PI->getIncomingValue(i);
// If the incoming value is the phi node itself, it can safely be skipped.
if (Incoming == PI) continue;
- Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, DT, MaxRecurse);
+ Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, TLI, DT, MaxRecurse);
// If the operation failed to simplify, or simplified to a different value
// to previously, then give up.
if (!V || (CommonValue && V != CommonValue))
@@ -576,13 +589,15 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
/// SimplifyAddInst - Given operands for an Add, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const TargetData *TD, const DominatorTree *DT,
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT,
unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(),
- Ops, TD);
+ Ops, TD, TLI);
}
// Canonicalize the constant to the RHS.
@@ -612,17 +627,17 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
/// i1 add -> xor.
if (MaxRecurse && Op0->getType()->isIntegerTy(1))
- if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyXorInst(Op0, Op1, TD, TLI, DT, MaxRecurse-1))
return V;
// Try some generic simplifications for associative operations.
- if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, DT,
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, TLI, DT,
MaxRecurse))
return V;
// Mul distributes over Add. Try some generic simplifications based on this.
if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul,
- TD, DT, MaxRecurse))
+ TD, TLI, DT, MaxRecurse))
return V;
// Threading Add over selects and phi nodes is pointless, so don't bother.
@@ -638,20 +653,23 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
}
Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const TargetData *TD, const DominatorTree *DT) {
- return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+ const TargetData *TD, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, TLI, DT, RecursionLimit);
}
/// SimplifySubInst - Given operands for a Sub, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const TargetData *TD, const DominatorTree *DT,
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT,
unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0))
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(),
- Ops, TD);
+ Ops, TD, TLI);
}
// X - undef -> undef
@@ -679,18 +697,18 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Value *Y = 0, *Z = Op1;
if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z
// See if "V === Y - Z" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, TLI, DT, MaxRecurse-1))
// It does! Now see if "X + V" simplifies.
- if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, DT,
+ if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, TLI, DT,
MaxRecurse-1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
}
// See if "V === X - Z" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, TLI, DT, MaxRecurse-1))
// It does! Now see if "Y + V" simplifies.
- if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, DT,
+ if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, TLI, DT,
MaxRecurse-1)) {
// It does, we successfully reassociated!
++NumReassoc;
@@ -703,18 +721,18 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
X = Op0;
if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z)
// See if "V === X - Y" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, TLI, DT, MaxRecurse-1))
// It does! Now see if "V - Z" simplifies.
- if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, DT,
+ if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, TLI, DT,
MaxRecurse-1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
}
// See if "V === X - Z" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, TLI, DT, MaxRecurse-1))
// It does! Now see if "V - Y" simplifies.
- if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, DT,
+ if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, TLI, DT,
MaxRecurse-1)) {
// It does, we successfully reassociated!
++NumReassoc;
@@ -727,9 +745,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Z = Op0;
if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y)
// See if "V === Z - X" simplifies.
- if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, TLI, DT, MaxRecurse-1))
// It does! Now see if "V + Y" simplifies.
- if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, DT,
+ if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, TLI, DT,
MaxRecurse-1)) {
// It does, we successfully reassociated!
++NumReassoc;
@@ -738,12 +756,12 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
// Mul distributes over Sub. Try some generic simplifications based on this.
if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul,
- TD, DT, MaxRecurse))
+ TD, TLI, DT, MaxRecurse))
return V;
// i1 sub -> xor.
if (MaxRecurse && Op0->getType()->isIntegerTy(1))
- if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyXorInst(Op0, Op1, TD, TLI, DT, MaxRecurse-1))
return V;
// Threading Sub over selects and phi nodes is pointless, so don't bother.
@@ -759,19 +777,22 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
}
Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const TargetData *TD, const DominatorTree *DT) {
- return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, TLI, DT, RecursionLimit);
}
/// SimplifyMulInst - Given operands for a Mul, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT, unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(),
- Ops, TD);
+ Ops, TD, TLI);
}
// Canonicalize the constant to the RHS.
@@ -802,30 +823,30 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
// i1 mul -> and.
if (MaxRecurse && Op0->getType()->isIntegerTy(1))
- if (Value *V = SimplifyAndInst(Op0, Op1, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyAndInst(Op0, Op1, TD, TLI, DT, MaxRecurse-1))
return V;
// Try some generic simplifications for associative operations.
- if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, DT,
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, TLI, DT,
MaxRecurse))
return V;
// Mul distributes over Add. Try some generic simplifications based on this.
if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add,
- TD, DT, MaxRecurse))
+ TD, TLI, DT, MaxRecurse))
return V;
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, DT,
+ if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, TLI, DT,
MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, DT,
+ if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, TLI, DT,
MaxRecurse))
return V;
@@ -833,19 +854,20 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
}
Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyMulInst(Op0, Op1, TD, DT, RecursionLimit);
+ return ::SimplifyMulInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
}
/// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
- const TargetData *TD, const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const TargetData *TD, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT, unsigned MaxRecurse) {
if (Constant *C0 = dyn_cast<Constant>(Op0)) {
if (Constant *C1 = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { C0, C1 };
- return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD);
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD, TLI);
}
}
@@ -898,13 +920,15 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, TLI, DT,
+ MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, TLI, DT,
+ MaxRecurse))
return V;
return 0;
@@ -913,34 +937,41 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
/// SimplifySDivInst - Given operands for an SDiv, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT, unsigned MaxRecurse) {
- if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, DT, MaxRecurse))
+ if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, TLI, DT,
+ MaxRecurse))
return V;
return 0;
}
Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifySDivInst(Op0, Op1, TD, DT, RecursionLimit);
+ return ::SimplifySDivInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
}
/// SimplifyUDivInst - Given operands for a UDiv, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT, unsigned MaxRecurse) {
- if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, DT, MaxRecurse))
+ if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, TLI, DT,
+ MaxRecurse))
return V;
return 0;
}
Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyUDivInst(Op0, Op1, TD, DT, RecursionLimit);
+ return ::SimplifyUDivInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
}
static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *,
+ const TargetLibraryInfo *,
const DominatorTree *, unsigned) {
// undef / X -> undef (the undef could be a snan).
if (match(Op0, m_Undef()))
@@ -954,19 +985,20 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *,
}
Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyFDivInst(Op0, Op1, TD, DT, RecursionLimit);
+ return ::SimplifyFDivInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
}
/// SimplifyRem - Given operands for an SRem or URem, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
- const TargetData *TD, const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const TargetData *TD, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT, unsigned MaxRecurse) {
if (Constant *C0 = dyn_cast<Constant>(Op0)) {
if (Constant *C1 = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { C0, C1 };
- return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD);
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD, TLI);
}
}
@@ -1001,13 +1033,13 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse))
return V;
return 0;
@@ -1016,35 +1048,43 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
/// SimplifySRemInst - Given operands for an SRem, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
- const DominatorTree *DT, unsigned MaxRecurse) {
- if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, DT, MaxRecurse))
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, TLI, DT, MaxRecurse))
return V;
return 0;
}
Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifySRemInst(Op0, Op1, TD, DT, RecursionLimit);
+ return ::SimplifySRemInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
}
/// SimplifyURemInst - Given operands for a URem, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
- const DominatorTree *DT, unsigned MaxRecurse) {
- if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, DT, MaxRecurse))
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, TLI, DT, MaxRecurse))
return V;
return 0;
}
Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyURemInst(Op0, Op1, TD, DT, RecursionLimit);
+ return ::SimplifyURemInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
}
static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *,
- const DominatorTree *, unsigned) {
+ const TargetLibraryInfo *,
+ const DominatorTree *,
+ unsigned) {
// undef % X -> undef (the undef could be a snan).
if (match(Op0, m_Undef()))
return Op0;
@@ -1057,19 +1097,20 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *,
}
Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyFRemInst(Op0, Op1, TD, DT, RecursionLimit);
+ return ::SimplifyFRemInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
}
/// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
- const TargetData *TD, const DominatorTree *DT,
- unsigned MaxRecurse) {
+ const TargetData *TD, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT, unsigned MaxRecurse) {
if (Constant *C0 = dyn_cast<Constant>(Op0)) {
if (Constant *C1 = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { C0, C1 };
- return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD);
+ return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD, TLI);
}
}
@@ -1094,13 +1135,13 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse))
return V;
return 0;
@@ -1109,9 +1150,10 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
/// SimplifyShlInst - Given operands for an Shl, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const TargetData *TD, const DominatorTree *DT,
- unsigned MaxRecurse) {
- if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, DT, MaxRecurse))
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT, unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, TLI, DT, MaxRecurse))
return V;
// undef << X -> 0
@@ -1127,16 +1169,19 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
}
Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const TargetData *TD, const DominatorTree *DT) {
- return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+ const TargetData *TD, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, TLI, DT, RecursionLimit);
}
/// SimplifyLShrInst - Given operands for an LShr, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
- const TargetData *TD, const DominatorTree *DT,
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT,
unsigned MaxRecurse) {
- if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, DT, MaxRecurse))
+ if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, TLI, DT, MaxRecurse))
return V;
// undef >>l X -> 0
@@ -1153,16 +1198,20 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
}
Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
- const TargetData *TD, const DominatorTree *DT) {
- return ::SimplifyLShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit);
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyLShrInst(Op0, Op1, isExact, TD, TLI, DT, RecursionLimit);
}
/// SimplifyAShrInst - Given operands for an AShr, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
- const TargetData *TD, const DominatorTree *DT,
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT,
unsigned MaxRecurse) {
- if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, DT, MaxRecurse))
+ if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, TLI, DT, MaxRecurse))
return V;
// all ones >>a X -> all ones
@@ -1183,19 +1232,23 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
}
Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
- const TargetData *TD, const DominatorTree *DT) {
- return ::SimplifyAShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit);
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyAShrInst(Op0, Op1, isExact, TD, TLI, DT, RecursionLimit);
}
/// SimplifyAndInst - Given operands for an And, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
- const DominatorTree *DT, unsigned MaxRecurse) {
+static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT,
+ unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::And, CLHS->getType(),
- Ops, TD);
+ Ops, TD, TLI);
}
// Canonicalize the constant to the RHS.
@@ -1244,36 +1297,36 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
}
// Try some generic simplifications for associative operations.
- if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, DT,
- MaxRecurse))
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, TLI,
+ DT, MaxRecurse))
return V;
// And distributes over Or. Try some generic simplifications based on this.
if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or,
- TD, DT, MaxRecurse))
+ TD, TLI, DT, MaxRecurse))
return V;
// And distributes over Xor. Try some generic simplifications based on this.
if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor,
- TD, DT, MaxRecurse))
+ TD, TLI, DT, MaxRecurse))
return V;
// Or distributes over And. Try some generic simplifications based on this.
if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or,
- TD, DT, MaxRecurse))
+ TD, TLI, DT, MaxRecurse))
return V;
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, DT,
- MaxRecurse))
+ if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, TLI,
+ DT, MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, DT,
+ if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, TLI, DT,
MaxRecurse))
return V;
@@ -1281,19 +1334,21 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
}
Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyAndInst(Op0, Op1, TD, DT, RecursionLimit);
+ return ::SimplifyAndInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
}
/// SimplifyOrInst - Given operands for an Or, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
+static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT, unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(),
- Ops, TD);
+ Ops, TD, TLI);
}
// Canonicalize the constant to the RHS.
@@ -1343,31 +1398,31 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
return Constant::getAllOnesValue(Op0->getType());
// Try some generic simplifications for associative operations.
- if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, DT,
- MaxRecurse))
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, TLI,
+ DT, MaxRecurse))
return V;
// Or distributes over And. Try some generic simplifications based on this.
- if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And,
- TD, DT, MaxRecurse))
+ if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, TD,
+ TLI, DT, MaxRecurse))
return V;
// And distributes over Or. Try some generic simplifications based on this.
if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And,
- TD, DT, MaxRecurse))
+ TD, TLI, DT, MaxRecurse))
return V;
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, DT,
+ if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, TLI, DT,
MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, DT,
+ if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, TLI, DT,
MaxRecurse))
return V;
@@ -1375,19 +1430,21 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
}
Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyOrInst(Op0, Op1, TD, DT, RecursionLimit);
+ return ::SimplifyOrInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
}
/// SimplifyXorInst - Given operands for a Xor, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT, unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
Constant *Ops[] = { CLHS, CRHS };
return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(),
- Ops, TD);
+ Ops, TD, TLI);
}
// Canonicalize the constant to the RHS.
@@ -1412,13 +1469,13 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
return Constant::getAllOnesValue(Op0->getType());
// Try some generic simplifications for associative operations.
- if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, DT,
- MaxRecurse))
+ if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, TLI,
+ DT, MaxRecurse))
return V;
// And distributes over Xor. Try some generic simplifications based on this.
if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And,
- TD, DT, MaxRecurse))
+ TD, TLI, DT, MaxRecurse))
return V;
// Threading Xor over selects and phi nodes is pointless, so don't bother.
@@ -1434,8 +1491,9 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
}
Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return ::SimplifyXorInst(Op0, Op1, TD, DT, RecursionLimit);
+ return ::SimplifyXorInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
}
static Type *GetCompareTy(Value *Op) {
@@ -1465,14 +1523,16 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const TargetData *TD, const DominatorTree *DT,
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT,
unsigned MaxRecurse) {
CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
if (Constant *CRHS = dyn_cast<Constant>(RHS))
- return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
+ return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD, TLI);
// If we have a constant, make sure it is on the RHS.
std::swap(LHS, RHS);
@@ -1489,8 +1549,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
// Special case logic when the operands have i1 type.
- if (OpTy->isIntegerTy(1) || (OpTy->isVectorTy() &&
- cast<VectorType>(OpTy)->getElementType()->isIntegerTy(1))) {
+ if (OpTy->getScalarType()->isIntegerTy(1)) {
switch (Pred) {
default: break;
case ICmpInst::ICMP_EQ:
@@ -1671,13 +1730,13 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// Transfer the cast to the constant.
if (Value *V = SimplifyICmpInst(Pred, SrcOp,
ConstantExpr::getIntToPtr(RHSC, SrcTy),
- TD, DT, MaxRecurse-1))
+ TD, TLI, DT, MaxRecurse-1))
return V;
} else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
if (RI->getOperand(0)->getType() == SrcTy)
// Compare without the cast.
if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
- TD, DT, MaxRecurse-1))
+ TD, TLI, DT, MaxRecurse-1))
return V;
}
}
@@ -1689,7 +1748,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
// Compare X and Y. Note that signed predicates become unsigned.
if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
- SrcOp, RI->getOperand(0), TD, DT,
+ SrcOp, RI->getOperand(0), TD, TLI, DT,
MaxRecurse-1))
return V;
}
@@ -1705,7 +1764,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// also a case of comparing two zero-extended values.
if (RExt == CI && MaxRecurse)
if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
- SrcOp, Trunc, TD, DT, MaxRecurse-1))
+ SrcOp, Trunc, TD, TLI, DT, MaxRecurse-1))
return V;
// Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
@@ -1750,7 +1809,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
// Compare X and Y. Note that the predicate does not change.
if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
- TD, DT, MaxRecurse-1))
+ TD, TLI, DT, MaxRecurse-1))
return V;
}
// Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
@@ -1764,7 +1823,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// If the re-extended constant didn't change then this is effectively
// also a case of comparing two sign-extended values.
if (RExt == CI && MaxRecurse)
- if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, DT,
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, TLI, DT,
MaxRecurse-1))
return V;
@@ -1800,7 +1859,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (MaxRecurse)
if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
Constant::getNullValue(SrcTy),
- TD, DT, MaxRecurse-1))
+ TD, TLI, DT, MaxRecurse-1))
return V;
break;
case ICmpInst::ICMP_ULT:
@@ -1809,7 +1868,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (MaxRecurse)
if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
Constant::getNullValue(SrcTy),
- TD, DT, MaxRecurse-1))
+ TD, TLI, DT, MaxRecurse-1))
return V;
break;
}
@@ -1843,14 +1902,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if ((A == RHS || B == RHS) && NoLHSWrapProblem)
if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A,
Constant::getNullValue(RHS->getType()),
- TD, DT, MaxRecurse-1))
+ TD, TLI, DT, MaxRecurse-1))
return V;
// icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
if ((C == LHS || D == LHS) && NoRHSWrapProblem)
if (Value *V = SimplifyICmpInst(Pred,
Constant::getNullValue(LHS->getType()),
- C == LHS ? D : C, TD, DT, MaxRecurse-1))
+ C == LHS ? D : C, TD, TLI, DT, MaxRecurse-1))
return V;
// icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow.
@@ -1859,7 +1918,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// Determine Y and Z in the form icmp (X+Y), (X+Z).
Value *Y = (A == C || A == D) ? B : A;
Value *Z = (C == A || C == B) ? D : C;
- if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, TLI, DT, MaxRecurse-1))
return V;
}
}
@@ -1942,7 +2001,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (!LBO->isExact() || !RBO->isExact())
break;
if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
- RBO->getOperand(0), TD, DT, MaxRecurse-1))
+ RBO->getOperand(0), TD, TLI, DT, MaxRecurse-1))
return V;
break;
case Instruction::Shl: {
@@ -1953,7 +2012,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (!NSW && ICmpInst::isSigned(Pred))
break;
if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
- RBO->getOperand(0), TD, DT, MaxRecurse-1))
+ RBO->getOperand(0), TD, TLI, DT, MaxRecurse-1))
return V;
break;
}
@@ -2007,7 +2066,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return V;
// Otherwise, see if "A EqP B" simplifies.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(EqP, A, B, TD, TLI, DT, MaxRecurse-1))
return V;
break;
case CmpInst::ICMP_NE:
@@ -2021,7 +2080,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return V;
// Otherwise, see if "A InvEqP B" simplifies.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, TLI, DT, MaxRecurse-1))
return V;
break;
}
@@ -2077,7 +2136,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return V;
// Otherwise, see if "A EqP B" simplifies.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(EqP, A, B, TD, TLI, DT, MaxRecurse-1))
return V;
break;
case CmpInst::ICMP_NE:
@@ -2091,7 +2150,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return V;
// Otherwise, see if "A InvEqP B" simplifies.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, TLI, DT, MaxRecurse-1))
return V;
break;
}
@@ -2151,34 +2210,38 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// If the comparison is with the result of a select instruction, check whether
// comparing with either branch of the select always yields the same value.
if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
- if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse))
+ if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse))
return V;
// If the comparison is with the result of a phi instruction, check whether
// doing the compare with each incoming phi value yields a common result.
if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
- if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse))
+ if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse))
return V;
return 0;
}
Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const TargetData *TD, const DominatorTree *DT) {
- return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, TLI, DT, RecursionLimit);
}
/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const TargetData *TD, const DominatorTree *DT,
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT,
unsigned MaxRecurse) {
CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
if (Constant *CRHS = dyn_cast<Constant>(RHS))
- return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
+ return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD, TLI);
// If we have a constant, make sure it is on the RHS.
std::swap(LHS, RHS);
@@ -2246,21 +2309,23 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// If the comparison is with the result of a select instruction, check whether
// comparing with either branch of the select always yields the same value.
if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
- if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse))
+ if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse))
return V;
// If the comparison is with the result of a phi instruction, check whether
// doing the compare with each incoming phi value yields a common result.
if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
- if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse))
+ if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse))
return V;
return 0;
}
Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const TargetData *TD, const DominatorTree *DT) {
- return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, TLI, DT, RecursionLimit);
}
/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
@@ -2291,10 +2356,13 @@ Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
/// fold the result. If not, this returns null.
-Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops,
- const TargetData *TD, const DominatorTree *) {
+Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD,
+ const DominatorTree *) {
// The type of the GEP pointer operand.
- PointerType *PtrTy = cast<PointerType>(Ops[0]->getType());
+ PointerType *PtrTy = dyn_cast<PointerType>(Ops[0]->getType());
+ // The GEP pointer operand is not a pointer, it's a vector of pointers.
+ if (!PtrTy)
+ return 0;
// getelementptr P -> P.
if (Ops.size() == 1)
@@ -2392,62 +2460,76 @@ static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) {
return CommonValue;
}
-
//=== Helper functions for higher up the class hierarchy.
/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- const TargetData *TD, const DominatorTree *DT,
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT,
unsigned MaxRecurse) {
switch (Opcode) {
case Instruction::Add:
return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
- TD, DT, MaxRecurse);
+ TD, TLI, DT, MaxRecurse);
case Instruction::Sub:
return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
- TD, DT, MaxRecurse);
- case Instruction::Mul: return SimplifyMulInst (LHS, RHS, TD, DT, MaxRecurse);
- case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, DT, MaxRecurse);
- case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, DT, MaxRecurse);
- case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, DT, MaxRecurse);
- case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, DT, MaxRecurse);
- case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, DT, MaxRecurse);
- case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, DT, MaxRecurse);
+ TD, TLI, DT, MaxRecurse);
+ case Instruction::Mul: return SimplifyMulInst (LHS, RHS, TD, TLI, DT,
+ MaxRecurse);
+ case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, TLI, DT,
+ MaxRecurse);
+ case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, TLI, DT,
+ MaxRecurse);
+ case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, TLI, DT,
+ MaxRecurse);
+ case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, TLI, DT,
+ MaxRecurse);
+ case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, TLI, DT,
+ MaxRecurse);
+ case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, TLI, DT,
+ MaxRecurse);
case Instruction::Shl:
return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
- TD, DT, MaxRecurse);
+ TD, TLI, DT, MaxRecurse);
case Instruction::LShr:
- return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse);
+ return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, TLI, DT,
+ MaxRecurse);
case Instruction::AShr:
- return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse);
- case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, DT, MaxRecurse);
- case Instruction::Or: return SimplifyOrInst (LHS, RHS, TD, DT, MaxRecurse);
- case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, DT, MaxRecurse);
+ return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, TLI, DT,
+ MaxRecurse);
+ case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, TLI, DT,
+ MaxRecurse);
+ case Instruction::Or: return SimplifyOrInst (LHS, RHS, TD, TLI, DT,
+ MaxRecurse);
+ case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, TLI, DT,
+ MaxRecurse);
default:
if (Constant *CLHS = dyn_cast<Constant>(LHS))
if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
Constant *COps[] = {CLHS, CRHS};
- return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, TD);
+ return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, TD, TLI);
}
// If the operation is associative, try some generic simplifications.
if (Instruction::isAssociative(Opcode))
- if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, DT,
+ if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, TLI, DT,
MaxRecurse))
return V;
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
- if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, DT,
+ if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, TLI, DT,
MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
- if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, DT, MaxRecurse))
+ if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, TLI, DT,
+ MaxRecurse))
return V;
return 0;
@@ -2455,23 +2537,27 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
}
Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- const TargetData *TD, const DominatorTree *DT) {
- return ::SimplifyBinOp(Opcode, LHS, RHS, TD, DT, RecursionLimit);
+ const TargetData *TD, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyBinOp(Opcode, LHS, RHS, TD, TLI, DT, RecursionLimit);
}
/// SimplifyCmpInst - Given operands for a CmpInst, see if we can
/// fold the result.
static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const TargetData *TD, const DominatorTree *DT,
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT,
unsigned MaxRecurse) {
if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
- return SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse);
- return SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse);
+ return SimplifyICmpInst(Predicate, LHS, RHS, TD, TLI, DT, MaxRecurse);
+ return SimplifyFCmpInst(Predicate, LHS, RHS, TD, TLI, DT, MaxRecurse);
}
Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const TargetData *TD, const DominatorTree *DT) {
- return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+ const TargetData *TD, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT) {
+ return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, TLI, DT, RecursionLimit);
}
static Value *SimplifyCallInst(CallInst *CI) {
@@ -2485,78 +2571,79 @@ static Value *SimplifyCallInst(CallInst *CI) {
/// SimplifyInstruction - See if we can compute a simplified version of this
/// instruction. If not, this returns null.
Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
Value *Result;
switch (I->getOpcode()) {
default:
- Result = ConstantFoldInstruction(I, TD);
+ Result = ConstantFoldInstruction(I, TD, TLI);
break;
case Instruction::Add:
Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->hasNoSignedWrap(),
cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
- TD, DT);
+ TD, TLI, DT);
break;
case Instruction::Sub:
Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->hasNoSignedWrap(),
cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
- TD, DT);
+ TD, TLI, DT);
break;
case Instruction::Mul:
- Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::SDiv:
- Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::UDiv:
- Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::FDiv:
- Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::SRem:
- Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::URem:
- Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::FRem:
- Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::Shl:
Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->hasNoSignedWrap(),
cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
- TD, DT);
+ TD, TLI, DT);
break;
case Instruction::LShr:
Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->isExact(),
- TD, DT);
+ TD, TLI, DT);
break;
case Instruction::AShr:
Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->isExact(),
- TD, DT);
+ TD, TLI, DT);
break;
case Instruction::And:
- Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::Or:
- Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::Xor:
- Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, DT);
+ Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::ICmp:
Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
- I->getOperand(0), I->getOperand(1), TD, DT);
+ I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::FCmp:
Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
- I->getOperand(0), I->getOperand(1), TD, DT);
+ I->getOperand(0), I->getOperand(1), TD, TLI, DT);
break;
case Instruction::Select:
Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
@@ -2596,6 +2683,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD,
///
void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
const TargetData *TD,
+ const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!");
@@ -2620,12 +2708,12 @@ void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
// SimplifyInstruction.
AssertingVH<> UserHandle(User);
- SimplifiedVal = SimplifyInstruction(User, TD, DT);
+ SimplifiedVal = SimplifyInstruction(User, TD, TLI, DT);
if (SimplifiedVal == 0) continue;
}
// Recursively simplify this user to the new value.
- ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD, DT);
+ ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD, TLI, DT);
From = dyn_cast_or_null<Instruction>((Value*)FromHandle);
To = ToHandle;
diff --git a/lib/Analysis/LLVMBuild.txt b/lib/Analysis/LLVMBuild.txt
index 92f199b..a8a8079 100644
--- a/lib/Analysis/LLVMBuild.txt
+++ b/lib/Analysis/LLVMBuild.txt
@@ -15,9 +15,11 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = IPA
+
[component_0]
type = Library
name = Analysis
parent = Libraries
required_libraries = Core Support Target
-
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index f80595c..d27d911 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -20,6 +20,7 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/Debug.h"
@@ -33,7 +34,10 @@
using namespace llvm;
char LazyValueInfo::ID = 0;
-INITIALIZE_PASS(LazyValueInfo, "lazy-value-info",
+INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info",
+ "Lazy Value Information Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(LazyValueInfo, "lazy-value-info",
"Lazy Value Information Analysis", false, true)
namespace llvm {
@@ -61,10 +65,10 @@ class LVILatticeVal {
constant,
/// notconstant - This Value is known to not have the specified value.
notconstant,
-
+
/// constantrange - The Value falls within this range.
constantrange,
-
+
/// overdefined - This value is not known to be constant, and we know that
/// it has a value.
overdefined
@@ -207,7 +211,7 @@ public:
// Unless we can prove that the two Constants are different, we must
// move to overdefined.
- // FIXME: use TargetData for smarter constant folding.
+ // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding.
if (ConstantInt *Res = dyn_cast<ConstantInt>(
ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
getConstant(),
@@ -233,7 +237,7 @@ public:
// Unless we can prove that the two Constants are different, we must
// move to overdefined.
- // FIXME: use TargetData for smarter constant folding.
+ // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding.
if (ConstantInt *Res = dyn_cast<ConstantInt>(
ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
getNotConstant(),
@@ -367,7 +371,11 @@ namespace {
/// for cache updating.
typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
DenseSet<OverDefinedPairTy> OverDefinedCache;
-
+
+ /// SeenBlocks - Keep track of all blocks that we have ever seen, so we
+ /// don't spend time removing unused blocks from our caches.
+ DenseSet<AssertingVH<BasicBlock> > SeenBlocks;
+
/// BlockValueStack - This stack holds the state of the value solver
/// during a query. It basically emulates the callstack of the naive
/// recursive value lookup process.
@@ -438,6 +446,7 @@ namespace {
/// clear - Empty the cache.
void clear() {
+ SeenBlocks.clear();
ValueCache.clear();
OverDefinedCache.clear();
}
@@ -466,6 +475,12 @@ void LVIValueHandle::deleted() {
}
void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
+ // Shortcut if we have never seen this block.
+ DenseSet<AssertingVH<BasicBlock> >::iterator I = SeenBlocks.find(BB);
+ if (I == SeenBlocks.end())
+ return;
+ SeenBlocks.erase(I);
+
SmallVector<OverDefinedPairTy, 4> ToErase;
for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(),
E = OverDefinedCache.end(); I != E; ++I) {
@@ -505,6 +520,7 @@ LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
if (Constant *VC = dyn_cast<Constant>(Val))
return LVILatticeVal::get(VC);
+ SeenBlocks.insert(BB);
return lookup(Val)[BB];
}
@@ -513,6 +529,7 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
return true;
ValueCacheEntryTy &Cache = lookup(Val);
+ SeenBlocks.insert(BB);
LVILatticeVal &BBLV = Cache[BB];
// OverDefinedCacheUpdater is a helper object that will update
@@ -1007,12 +1024,19 @@ static LazyValueInfoCache &getCache(void *&PImpl) {
bool LazyValueInfo::runOnFunction(Function &F) {
if (PImpl)
getCache(PImpl).clear();
-
+
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
// Fully lazy.
return false;
}
+void LazyValueInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<TargetLibraryInfo>();
+}
+
void LazyValueInfo::releaseMemory() {
// If the cache was allocated, free it.
if (PImpl) {
@@ -1061,7 +1085,8 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
// If we know the value is a constant, evaluate the conditional.
Constant *Res = 0;
if (Result.isConstant()) {
- Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD);
+ Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD,
+ TLI);
if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res))
return ResCI->isZero() ? False : True;
return Unknown;
@@ -1102,13 +1127,15 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
if (Pred == ICmpInst::ICMP_EQ) {
// !C1 == C -> false iff C1 == C.
Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
- Result.getNotConstant(), C, TD);
+ Result.getNotConstant(), C, TD,
+ TLI);
if (Res->isNullValue())
return False;
} else if (Pred == ICmpInst::ICMP_NE) {
// !C1 != C -> true iff C1 == C.
Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
- Result.getNotConstant(), C, TD);
+ Result.getNotConstant(), C, TD,
+ TLI);
if (Res->isNullValue())
return True;
}
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 38d677d..971065f 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -44,6 +44,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Pass.h"
#include "llvm/PassManager.h"
#include "llvm/IntrinsicInst.h"
@@ -103,6 +104,7 @@ namespace {
AliasAnalysis *AA;
DominatorTree *DT;
TargetData *TD;
+ TargetLibraryInfo *TLI;
std::string Messages;
raw_string_ostream MessagesStr;
@@ -117,6 +119,7 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetLibraryInfo>();
AU.addRequired<DominatorTree>();
}
virtual void print(raw_ostream &O, const Module *M) const {}
@@ -149,6 +152,7 @@ namespace {
char Lint::ID = 0;
INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
@@ -174,6 +178,7 @@ bool Lint::runOnFunction(Function &F) {
AA = &getAnalysis<AliasAnalysis>();
DT = &getAnalysis<DominatorTree>();
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
visit(F);
dbgs() << MessagesStr.str();
Messages.clear();
@@ -614,10 +619,10 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
// As a last resort, try SimplifyInstruction or constant folding.
if (Instruction *Inst = dyn_cast<Instruction>(V)) {
- if (Value *W = SimplifyInstruction(Inst, TD, DT))
+ if (Value *W = SimplifyInstruction(Inst, TD, TLI, DT))
return findValueImpl(W, OffsetOk, Visited);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
- if (Value *W = ConstantFoldConstantExpression(CE, TD))
+ if (Value *W = ConstantFoldConstantExpression(CE, TD, TLI))
if (W != V)
return findValueImpl(W, OffsetOk, Visited);
}
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index c7833bf..858cc64 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/Instructions.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
@@ -95,7 +96,7 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
// Test if the value is already loop-invariant.
if (isLoopInvariant(I))
return true;
- if (!I->isSafeToSpeculativelyExecute())
+ if (!isSafeToSpeculativelyExecute(I))
return false;
if (I->mayReadFromMemory())
return false;
@@ -165,99 +166,6 @@ PHINode *Loop::getCanonicalInductionVariable() const {
return 0;
}
-/// getTripCount - Return a loop-invariant LLVM value indicating the number of
-/// times the loop will be executed. Note that this means that the backedge
-/// of the loop executes N-1 times. If the trip-count cannot be determined,
-/// this returns null.
-///
-/// The IndVarSimplify pass transforms loops to have a form that this
-/// function easily understands.
-///
-Value *Loop::getTripCount() const {
- // Canonical loops will end with a 'cmp ne I, V', where I is the incremented
- // canonical induction variable and V is the trip count of the loop.
- PHINode *IV = getCanonicalInductionVariable();
- if (IV == 0 || IV->getNumIncomingValues() != 2) return 0;
-
- bool P0InLoop = contains(IV->getIncomingBlock(0));
- Value *Inc = IV->getIncomingValue(!P0InLoop);
- BasicBlock *BackedgeBlock = IV->getIncomingBlock(!P0InLoop);
-
- if (BranchInst *BI = dyn_cast<BranchInst>(BackedgeBlock->getTerminator()))
- if (BI->isConditional()) {
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
- if (ICI->getOperand(0) == Inc) {
- if (BI->getSuccessor(0) == getHeader()) {
- if (ICI->getPredicate() == ICmpInst::ICMP_NE)
- return ICI->getOperand(1);
- } else if (ICI->getPredicate() == ICmpInst::ICMP_EQ) {
- return ICI->getOperand(1);
- }
- }
- }
- }
-
- return 0;
-}
-
-/// getSmallConstantTripCount - Returns the trip count of this loop as a
-/// normal unsigned value, if possible. Returns 0 if the trip count is unknown
-/// or not constant. Will also return 0 if the trip count is very large
-/// (>= 2^32)
-unsigned Loop::getSmallConstantTripCount() const {
- Value* TripCount = this->getTripCount();
- if (TripCount) {
- if (ConstantInt *TripCountC = dyn_cast<ConstantInt>(TripCount)) {
- // Guard against huge trip counts.
- if (TripCountC->getValue().getActiveBits() <= 32) {
- return (unsigned)TripCountC->getZExtValue();
- }
- }
- }
- return 0;
-}
-
-/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
-/// trip count of this loop as a normal unsigned value, if possible. This
-/// means that the actual trip count is always a multiple of the returned
-/// value (don't forget the trip count could very well be zero as well!).
-///
-/// Returns 1 if the trip count is unknown or not guaranteed to be the
-/// multiple of a constant (which is also the case if the trip count is simply
-/// constant, use getSmallConstantTripCount for that case), Will also return 1
-/// if the trip count is very large (>= 2^32).
-unsigned Loop::getSmallConstantTripMultiple() const {
- Value* TripCount = this->getTripCount();
- // This will hold the ConstantInt result, if any
- ConstantInt *Result = NULL;
- if (TripCount) {
- // See if the trip count is constant itself
- Result = dyn_cast<ConstantInt>(TripCount);
- // if not, see if it is a multiplication
- if (!Result)
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TripCount)) {
- switch (BO->getOpcode()) {
- case BinaryOperator::Mul:
- Result = dyn_cast<ConstantInt>(BO->getOperand(1));
- break;
- case BinaryOperator::Shl:
- if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1)))
- if (CI->getValue().getActiveBits() <= 5)
- return 1u << CI->getZExtValue();
- break;
- default:
- break;
- }
- }
- }
- // Guard against huge trip counts.
- if (Result && Result->getValue().getActiveBits() <= 32) {
- return (unsigned)Result->getZExtValue();
- } else {
- return 1;
- }
-}
-
/// isLCSSAForm - Return true if the Loop is in LCSSA form
bool Loop::isLCSSAForm(DominatorTree &DT) const {
// Sort the blocks vector so that we can use binary search to do quick
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index fde07ea..22414b3 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -130,7 +130,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
AliasAnalysis::Location Loc = AA.getLocation(LI);
MDA.getNonLocalPointerDependency(Loc, true, LI->getParent(), NLDI);
} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (!LI->isUnordered()) {
+ if (!SI->isUnordered()) {
// FIXME: Handle atomic/volatile stores.
Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown),
static_cast<BasicBlock *>(0)));
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index 7e22ddc..80ea219 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/Analysis/Dominators.h"
@@ -27,7 +28,7 @@ static bool CanPHITrans(Instruction *Inst) {
return true;
if (isa<CastInst>(Inst) &&
- Inst->isSafeToSpeculativelyExecute())
+ isSafeToSpeculativelyExecute(Inst))
return true;
if (Inst->getOpcode() == Instruction::Add &&
@@ -186,7 +187,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
// operands need to be phi translated, and if so, reconstruct it.
if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
- if (!Cast->isSafeToSpeculativelyExecute()) return 0;
+ if (!isSafeToSpeculativelyExecute(Cast)) return 0;
Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT);
if (PHIIn == 0) return 0;
if (PHIIn == Cast->getOperand(0))
@@ -284,7 +285,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
}
// See if the add simplifies away.
- if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, DT)) {
+ if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, TLI, DT)) {
// If we simplified the operands, the LHS is no longer an input, but Res
// is.
RemoveInstInputs(LHS, InstInputs);
@@ -381,7 +382,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
// Handle cast of PHI translatable value.
if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
- if (!Cast->isSafeToSpeculativelyExecute()) return 0;
+ if (!isSafeToSpeculativelyExecute(Cast)) return 0;
Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0),
CurBB, PredBB, DT, NewInsts);
if (OpVal == 0) return 0;
diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp
index 379d79c..0cb1588 100644
--- a/lib/Analysis/ProfileVerifierPass.cpp
+++ b/lib/Analysis/ProfileVerifierPass.cpp
@@ -30,7 +30,7 @@ static cl::opt<bool,false>
ProfileVerifierDisableAssertions("profile-verifier-noassert",
cl::desc("Disable assertions"));
-namespace llvm {
+namespace {
template<class FType, class BType>
class ProfileVerifierPassT : public FunctionPass {
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 622b214..daf7742 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -74,6 +74,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/Debug.h"
@@ -108,6 +109,7 @@ INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
"Scalar Evolution Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
"Scalar Evolution Analysis", false, true)
char ScalarEvolution::ID = 0;
@@ -188,6 +190,14 @@ void SCEV::print(raw_ostream &OS) const {
OS << OpStr;
}
OS << ")";
+ switch (NAry->getSCEVType()) {
+ case scAddExpr:
+ case scMulExpr:
+ if (NAry->getNoWrapFlags(FlagNUW))
+ OS << "<nuw>";
+ if (NAry->getNoWrapFlags(FlagNSW))
+ OS << "<nsw>";
+ }
return;
}
case scUDivExpr: {
@@ -2581,7 +2591,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) {
Constant *C = ConstantExpr::getSizeOf(AllocTy);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
@@ -2590,7 +2600,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) {
const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) {
Constant *C = ConstantExpr::getAlignOf(AllocTy);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
@@ -2607,7 +2617,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy,
Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
@@ -2617,7 +2627,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy,
Constant *FieldNo) {
Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
C = Folded;
Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
@@ -3108,7 +3118,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
// PHI's incoming blocks are in a different loop, in which case doing so
// risks breaking LCSSA form. Instcombine would normally zap these, but
// it doesn't have DominatorTree information, so it may miss cases.
- if (Value *V = SimplifyInstruction(PN, TD, DT))
+ if (Value *V = SimplifyInstruction(PN, TD, TLI, DT))
if (LI->replacementPreservesLCSSAForm(PN, V))
return getSCEV(V);
@@ -3584,6 +3594,12 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// because it leads to N-1 getAddExpr calls for N ultimate operands.
// Instead, gather up all the operands and make a single getAddExpr call.
// LLVM IR canonical form means we need only traverse the left operands.
+ //
+ // Don't apply this instruction's NSW or NUW flags to the new
+ // expression. The instruction may be guarded by control flow that the
+ // no-wrap behavior depends on. Non-control-equivalent instructions can be
+ // mapped to the same SCEV expression, and it would be incorrect to transfer
+ // NSW/NUW semantics to those operations.
SmallVector<const SCEV *, 4> AddOps;
AddOps.push_back(getSCEV(U->getOperand(1)));
for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
@@ -3598,16 +3614,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
AddOps.push_back(Op1);
}
AddOps.push_back(getSCEV(U->getOperand(0)));
- SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
- OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(V);
- if (OBO->hasNoSignedWrap())
- Flags = setFlags(Flags, SCEV::FlagNSW);
- if (OBO->hasNoUnsignedWrap())
- Flags = setFlags(Flags, SCEV::FlagNUW);
- return getAddExpr(AddOps, Flags);
+ return getAddExpr(AddOps);
}
case Instruction::Mul: {
- // See the Add code above.
+ // Don't transfer NSW/NUW for the same reason as AddExpr.
SmallVector<const SCEV *, 4> MulOps;
MulOps.push_back(getSCEV(U->getOperand(1)));
for (Value *Op = U->getOperand(0);
@@ -4762,7 +4772,8 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
/// reason, return null.
static Constant *EvaluateExpression(Value *V, const Loop *L,
DenseMap<Instruction *, Constant *> &Vals,
- const TargetData *TD) {
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
// Convenient constant check, but redundant for recursive calls.
if (Constant *C = dyn_cast<Constant>(V)) return C;
Instruction *I = dyn_cast<Instruction>(V);
@@ -4788,7 +4799,7 @@ static Constant *EvaluateExpression(Value *V, const Loop *L,
if (!Operands[i]) return 0;
continue;
}
- Constant *C = EvaluateExpression(Operand, L, Vals, TD);
+ Constant *C = EvaluateExpression(Operand, L, Vals, TD, TLI);
Vals[Operand] = C;
if (!C) return 0;
Operands[i] = C;
@@ -4796,12 +4807,13 @@ static Constant *EvaluateExpression(Value *V, const Loop *L,
if (CmpInst *CI = dyn_cast<CmpInst>(I))
return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
- Operands[1], TD);
+ Operands[1], TD, TLI);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (!LI->isVolatile())
return ConstantFoldLoadFromConstPtr(Operands[0], TD);
}
- return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD);
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD,
+ TLI);
}
/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
@@ -4856,7 +4868,8 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
// Compute the value of the PHIs for the next iteration.
// EvaluateExpression adds non-phi values to the CurrentIterVals map.
DenseMap<Instruction *, Constant *> NextIterVals;
- Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD);
+ Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD,
+ TLI);
if (NextPHI == 0)
return 0; // Couldn't evaluate!
NextIterVals[PN] = NextPHI;
@@ -4881,7 +4894,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
Constant *&NextPHI = NextIterVals[PHI];
if (!NextPHI) { // Not already computed.
Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
- NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD);
+ NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
}
if (NextPHI != I->second)
StoppedEvolving = false;
@@ -4936,8 +4949,8 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis.
for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
ConstantInt *CondVal =
- dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L,
- CurrentIterVals, TD));
+ dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals,
+ TD, TLI));
// Couldn't symbolically evaluate.
if (!CondVal) return getCouldNotCompute();
@@ -4967,7 +4980,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
if (NextPHI) continue; // Already computed!
Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
- NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD);
+ NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
}
CurrentIterVals.swap(NextIterVals);
}
@@ -5159,13 +5172,14 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
Constant *C = 0;
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
C = ConstantFoldCompareInstOperands(CI->getPredicate(),
- Operands[0], Operands[1], TD);
+ Operands[0], Operands[1], TD,
+ TLI);
else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (!LI->isVolatile())
C = ConstantFoldLoadFromConstPtr(Operands[0], TD);
} else
C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- Operands, TD);
+ Operands, TD, TLI);
if (!C) return V;
return getSCEV(C);
}
@@ -6552,6 +6566,7 @@ bool ScalarEvolution::runOnFunction(Function &F) {
this->F = &F;
LI = &getAnalysis<LoopInfo>();
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
DT = &getAnalysis<DominatorTree>();
return false;
}
@@ -6588,6 +6603,7 @@ void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequiredTransitive<LoopInfo>();
AU.addRequiredTransitive<DominatorTree>();
+ AU.addRequired<TargetLibraryInfo>();
}
bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 47f0f32..f3cf549 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -73,9 +73,14 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
"InsertNoopCastOfTo cannot change sizes!");
// Short-circuit unnecessary bitcasts.
- if (Op == Instruction::BitCast && V->getType() == Ty)
- return V;
-
+ if (Op == Instruction::BitCast) {
+ if (V->getType() == Ty)
+ return V;
+ if (CastInst *CI = dyn_cast<CastInst>(V)) {
+ if (CI->getOperand(0)->getType() == Ty)
+ return CI->getOperand(0);
+ }
+ }
// Short-circuit unnecessary inttoptr<->ptrtoint casts.
if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
@@ -929,6 +934,36 @@ bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
}
}
+/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
+/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
+/// need to materialize IV increments elsewhere to handle difficult situations.
+Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
+ Type *ExpandTy, Type *IntTy,
+ bool useSubtract) {
+ Value *IncV;
+ // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
+ if (ExpandTy->isPointerTy()) {
+ PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
+ // If the step isn't constant, don't use an implicitly scaled GEP, because
+ // that would require a multiply inside the loop.
+ if (!isa<ConstantInt>(StepV))
+ GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
+ GEPPtrTy->getAddressSpace());
+ const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
+ IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
+ if (IncV->getType() != PN->getType()) {
+ IncV = Builder.CreateBitCast(IncV, PN->getType());
+ rememberInstruction(IncV);
+ }
+ } else {
+ IncV = useSubtract ?
+ Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
+ Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
+ rememberInstruction(IncV);
+ }
+ return IncV;
+}
+
/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
/// the base addrec, which is the addrec without any non-loop-dominating
/// values, and return the PHI.
@@ -993,16 +1028,16 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(),
L->getHeader()));
- // Expand code for the step value. Insert instructions right before the
- // terminator corresponding to the back-edge. Do this before creating the PHI
- // so that PHI reuse code doesn't see an incomplete PHI. If the stride is
- // negative, insert a sub instead of an add for the increment (unless it's a
- // constant, because subtracts of constants are canonicalized to adds).
+ // Expand code for the step value. Do this before creating the PHI so that PHI
+ // reuse code doesn't see an incomplete PHI.
const SCEV *Step = Normalized->getStepRecurrence(SE);
- bool isPointer = ExpandTy->isPointerTy();
- bool isNegative = !isPointer && isNonConstantNegative(Step);
- if (isNegative)
+ // If the stride is negative, insert a sub instead of an add for the increment
+ // (unless it's a constant, because subtracts of constants are canonicalized
+ // to adds).
+ bool useSubtract = !ExpandTy->isPointerTy() && isNonConstantNegative(Step);
+ if (useSubtract)
Step = SE.getNegativeSCEV(Step);
+ // Expand the step somewhere that dominates the loop header.
Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
// Create the PHI.
@@ -1023,33 +1058,14 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
continue;
}
- // Create a step value and add it to the PHI. If IVIncInsertLoop is
- // non-null and equal to the addrec's loop, insert the instructions
- // at IVIncInsertPos.
+ // Create a step value and add it to the PHI.
+ // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
+ // instructions at IVIncInsertPos.
Instruction *InsertPos = L == IVIncInsertLoop ?
IVIncInsertPos : Pred->getTerminator();
Builder.SetInsertPoint(InsertPos);
- Value *IncV;
- // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
- if (isPointer) {
- PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
- // If the step isn't constant, don't use an implicitly scaled GEP, because
- // that would require a multiply inside the loop.
- if (!isa<ConstantInt>(StepV))
- GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
- GEPPtrTy->getAddressSpace());
- const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
- IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
- if (IncV->getType() != PN->getType()) {
- IncV = Builder.CreateBitCast(IncV, PN->getType());
- rememberInstruction(IncV);
- }
- } else {
- IncV = isNegative ?
- Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
- Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
- rememberInstruction(IncV);
- }
+ Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+
PN->addIncoming(IncV, Pred);
}
@@ -1124,10 +1140,31 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
// For an expansion to use the postinc form, the client must call
// expandCodeFor with an InsertPoint that is either outside the PostIncLoop
// or dominated by IVIncInsertPos.
- assert((!isa<Instruction>(Result) ||
- SE.DT->dominates(cast<Instruction>(Result),
- Builder.GetInsertPoint())) &&
- "postinc expansion does not dominate use");
+ if (isa<Instruction>(Result)
+ && !SE.DT->dominates(cast<Instruction>(Result),
+ Builder.GetInsertPoint())) {
+ // The induction variable's postinc expansion does not dominate this use.
+ // IVUsers tries to prevent this case, so it is rare. However, it can
+ // happen when an IVUser outside the loop is not dominated by the latch
+ // block. Adjusting IVIncInsertPos before expansion begins cannot handle
+ // all cases. Consider a phi outide whose operand is replaced during
+ // expansion with the value of the postinc user. Without fundamentally
+ // changing the way postinc users are tracked, the only remedy is
+ // inserting an extra IV increment. StepV might fold into PostLoopOffset,
+ // but hopefully expandCodeFor handles that.
+ bool useSubtract =
+ !ExpandTy->isPointerTy() && isNonConstantNegative(Step);
+ if (useSubtract)
+ Step = SE.getNegativeSCEV(Step);
+ // Expand the step somewhere that dominates the loop header.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+ // Restore the insertion point to the place where the caller has
+ // determined dominates all uses.
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+ Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+ }
}
// Re-apply any non-loop-dominating scale.
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 22f1c14..ef19e06 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -63,13 +63,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
assert(V && "No Value?");
assert(Depth <= MaxDepth && "Limit Search Depth");
unsigned BitWidth = Mask.getBitWidth();
- assert((V->getType()->isIntOrIntVectorTy() || V->getType()->isPointerTy())
- && "Not integer or pointer type!");
+ assert((V->getType()->isIntOrIntVectorTy() ||
+ V->getType()->getScalarType()->isPointerTy()) &&
+ "Not integer or pointer type!");
assert((!TD ||
TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
(!V->getType()->isIntOrIntVectorTy() ||
V->getType()->getScalarSizeInBits() == BitWidth) &&
- KnownZero.getBitWidth() == BitWidth &&
+ KnownZero.getBitWidth() == BitWidth &&
KnownOne.getBitWidth() == BitWidth &&
"V, Mask, KnownOne and KnownZero should have same BitWidth");
@@ -103,14 +104,16 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
unsigned Align = GV->getAlignment();
if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) {
- Type *ObjectType = GV->getType()->getElementType();
- // If the object is defined in the current Module, we'll be giving
- // it the preferred alignment. Otherwise, we have to assume that it
- // may only have the minimum ABI alignment.
- if (!GV->isDeclaration() && !GV->mayBeOverridden())
- Align = TD->getPrefTypeAlignment(ObjectType);
- else
- Align = TD->getABITypeAlignment(ObjectType);
+ if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+ Type *ObjectType = GVar->getType()->getElementType();
+ // If the object is defined in the current Module, we'll be giving
+ // it the preferred alignment. Otherwise, we have to assume that it
+ // may only have the minimum ABI alignment.
+ if (!GVar->isDeclaration() && !GVar->isWeakForLinker())
+ Align = TD->getPreferredAlignment(GVar);
+ else
+ Align = TD->getABITypeAlignment(ObjectType);
+ }
}
if (Align > 0)
KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
@@ -1367,6 +1370,8 @@ Value *llvm::isBytewiseValue(Value *V) {
return Val;
}
+
+ // FIXME: Vector types (e.g., <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>).
// Conceptually, we could handle things like:
// %a = zext i8 %X to i16
@@ -1555,7 +1560,8 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
const TargetData &TD) {
Operator *PtrOp = dyn_cast<Operator>(Ptr);
- if (PtrOp == 0) return Ptr;
+ if (PtrOp == 0 || Ptr->getType()->isVectorTy())
+ return Ptr;
// Just look through bitcasts.
if (PtrOp->getOpcode() == Instruction::BitCast)
@@ -1869,3 +1875,64 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
}
return true;
}
+
+bool llvm::isSafeToSpeculativelyExecute(const Instruction *Inst,
+ const TargetData *TD) {
+ for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+ if (Constant *C = dyn_cast<Constant>(Inst->getOperand(i)))
+ if (C->canTrap())
+ return false;
+
+ switch (Inst->getOpcode()) {
+ default:
+ return true;
+ case Instruction::UDiv:
+ case Instruction::URem:
+ // x / y is undefined if y == 0, but calcuations like x / 3 are safe.
+ return isKnownNonZero(Inst->getOperand(1), TD);
+ case Instruction::SDiv:
+ case Instruction::SRem: {
+ Value *Op = Inst->getOperand(1);
+ // x / y is undefined if y == 0
+ if (!isKnownNonZero(Op, TD))
+ return false;
+ // x / y might be undefined if y == -1
+ unsigned BitWidth = getBitWidth(Op->getType(), TD);
+ if (BitWidth == 0)
+ return false;
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ ComputeMaskedBits(Op, APInt::getAllOnesValue(BitWidth),
+ KnownZero, KnownOne, TD);
+ return !!KnownZero;
+ }
+ case Instruction::Load: {
+ const LoadInst *LI = cast<LoadInst>(Inst);
+ if (!LI->isUnordered())
+ return false;
+ return LI->getPointerOperand()->isDereferenceablePointer();
+ }
+ case Instruction::Call:
+ return false; // The called function could have undefined behavior or
+ // side-effects.
+ // FIXME: We should special-case some intrinsics (bswap,
+ // overflow-checking arithmetic, etc.)
+ case Instruction::VAArg:
+ case Instruction::Alloca:
+ case Instruction::Invoke:
+ case Instruction::PHI:
+ case Instruction::Store:
+ case Instruction::Ret:
+ case Instruction::Br:
+ case Instruction::IndirectBr:
+ case Instruction::Switch:
+ case Instruction::Unwind:
+ case Instruction::Unreachable:
+ case Instruction::Fence:
+ case Instruction::LandingPad:
+ case Instruction::AtomicRMW:
+ case Instruction::AtomicCmpXchg:
+ case Instruction::Resume:
+ return false; // Misc instructions which have effects
+ }
+}
diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp
index 8fcc7aa..9ef2943 100644
--- a/lib/Archive/ArchiveWriter.cpp
+++ b/lib/Archive/ArchiveWriter.cpp
@@ -182,11 +182,11 @@ Archive::addFileBefore(const sys::Path& filePath, iterator where,
if (hasSlash || filePath.str().length() > 15)
flags |= ArchiveMember::HasLongFilenameFlag;
- sys::LLVMFileType type;
+ sys::fs::file_magic type;
if (sys::fs::identify_magic(mbr->path.str(), type))
- type = sys::Unknown_FileType;
+ type = sys::fs::file_magic::unknown;
switch (type) {
- case sys::Bitcode_FileType:
+ case sys::fs::file_magic::bitcode:
flags |= ArchiveMember::BitcodeFlag;
break;
default:
diff --git a/lib/Archive/CMakeLists.txt b/lib/Archive/CMakeLists.txt
index b52974e..7ff478a 100644
--- a/lib/Archive/CMakeLists.txt
+++ b/lib/Archive/CMakeLists.txt
@@ -3,9 +3,3 @@ add_llvm_library(LLVMArchive
ArchiveReader.cpp
ArchiveWriter.cpp
)
-
-add_llvm_library_dependencies(LLVMArchive
- LLVMBitReader
- LLVMCore
- LLVMSupport
- )
diff --git a/lib/Archive/LLVMBuild.txt b/lib/Archive/LLVMBuild.txt
index 26b7c8e..d68550b 100644
--- a/lib/Archive/LLVMBuild.txt
+++ b/lib/Archive/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = Archive
parent = Libraries
required_libraries = BitReader Core Support
-
diff --git a/lib/AsmParser/CMakeLists.txt b/lib/AsmParser/CMakeLists.txt
index 7496015..985ebe2 100644
--- a/lib/AsmParser/CMakeLists.txt
+++ b/lib/AsmParser/CMakeLists.txt
@@ -4,8 +4,3 @@ add_llvm_library(LLVMAsmParser
LLParser.cpp
Parser.cpp
)
-
-add_llvm_library_dependencies(LLVMAsmParser
- LLVMCore
- LLVMSupport
- )
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index cafaab0..4678269 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -120,11 +120,6 @@ bool LLParser::ValidateEndOfModule() {
for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; )
UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove
- // Upgrade to new EH scheme. N.B. This will go away in 3.1.
- UpgradeExceptionHandling(M);
-
- // Check debug info intrinsics.
- CheckDebugInfoIntrinsics(M);
return false;
}
@@ -1069,7 +1064,7 @@ bool LLParser::ParseInstructionMetadata(Instruction *Inst,
return TokError("expected metadata after comma");
std::string Name = Lex.getStrVal();
- unsigned MDK = M->getMDKindID(Name.c_str());
+ unsigned MDK = M->getMDKindID(Name);
Lex.Lex();
MDNode *Node;
@@ -1612,7 +1607,8 @@ bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) {
if ((unsigned)Size != Size)
return Error(SizeLoc, "size too large for vector");
if (!VectorType::isValidElementType(EltTy))
- return Error(TypeLoc, "vector element type must be fp or integer");
+ return Error(TypeLoc,
+ "vector element type must be fp, integer or a pointer to these types");
Result = VectorType::get(EltTy, unsigned(Size));
} else {
if (!ArrayType::isValidElementType(EltTy))
@@ -1971,9 +1967,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
return Error(ID.Loc, "constant vector must not be empty");
if (!Elts[0]->getType()->isIntegerTy() &&
- !Elts[0]->getType()->isFloatingPointTy())
+ !Elts[0]->getType()->isFloatingPointTy() &&
+ !Elts[0]->getType()->isPointerTy())
return Error(FirstEltLoc,
- "vector elements must have integer or floating point type");
+ "vector elements must have integer, pointer or floating point type");
// Verify that all the vector elements have the same type.
for (unsigned i = 1, e = Elts.size(); i != e; ++i)
@@ -2165,7 +2162,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
} else {
assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!");
if (!Val0->getType()->isIntOrIntVectorTy() &&
- !Val0->getType()->isPointerTy())
+ !Val0->getType()->getScalarType()->isPointerTy())
return Error(ID.Loc, "icmp requires pointer or integer operands");
ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1);
}
@@ -2299,7 +2296,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
return true;
if (Opc == Instruction::GetElementPtr) {
- if (Elts.size() == 0 || !Elts[0]->getType()->isPointerTy())
+ if (Elts.size() == 0 ||
+ !Elts[0]->getType()->getScalarType()->isPointerTy())
return Error(ID.Loc, "getelementptr requires pointer operand");
ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
@@ -2953,19 +2951,11 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_tail: return ParseCall(Inst, PFS, true);
// Memory.
case lltok::kw_alloca: return ParseAlloc(Inst, PFS);
- case lltok::kw_load: return ParseLoad(Inst, PFS, false);
- case lltok::kw_store: return ParseStore(Inst, PFS, false);
+ case lltok::kw_load: return ParseLoad(Inst, PFS);
+ case lltok::kw_store: return ParseStore(Inst, PFS);
case lltok::kw_cmpxchg: return ParseCmpXchg(Inst, PFS);
case lltok::kw_atomicrmw: return ParseAtomicRMW(Inst, PFS);
case lltok::kw_fence: return ParseFence(Inst, PFS);
- case lltok::kw_volatile:
- // For compatibility; canonical location is after load
- if (EatIfPresent(lltok::kw_load))
- return ParseLoad(Inst, PFS, true);
- else if (EatIfPresent(lltok::kw_store))
- return ParseStore(Inst, PFS, true);
- else
- return TokError("expected 'load' or 'store'");
case lltok::kw_getelementptr: return ParseGetElementPtr(Inst, PFS);
case lltok::kw_extractvalue: return ParseExtractValue(Inst, PFS);
case lltok::kw_insertvalue: return ParseInsertValue(Inst, PFS);
@@ -3342,7 +3332,7 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
} else {
assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!");
if (!LHS->getType()->isIntOrIntVectorTy() &&
- !LHS->getType()->isPointerTy())
+ !LHS->getType()->getScalarType()->isPointerTy())
return Error(Loc, "icmp requires integer operands");
Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS);
}
@@ -3689,10 +3679,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
/// ::= 'load' 'volatile'? TypeAndValue (',' 'align' i32)?
/// ::= 'load' 'atomic' 'volatile'? TypeAndValue
/// 'singlethread'? AtomicOrdering (',' 'align' i32)?
-/// Compatibility:
-/// ::= 'volatile' 'load' TypeAndValue (',' 'align' i32)?
-int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
- bool isVolatile) {
+int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
Value *Val; LocTy Loc;
unsigned Alignment = 0;
bool AteExtraComma = false;
@@ -3701,15 +3688,12 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
SynchronizationScope Scope = CrossThread;
if (Lex.getKind() == lltok::kw_atomic) {
- if (isVolatile)
- return TokError("mixing atomic with old volatile placement");
isAtomic = true;
Lex.Lex();
}
+ bool isVolatile = false;
if (Lex.getKind() == lltok::kw_volatile) {
- if (isVolatile)
- return TokError("duplicate volatile before and after store");
isVolatile = true;
Lex.Lex();
}
@@ -3736,10 +3720,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
/// ::= 'store' 'volatile'? TypeAndValue ',' TypeAndValue (',' 'align' i32)?
/// ::= 'store' 'atomic' 'volatile'? TypeAndValue ',' TypeAndValue
/// 'singlethread'? AtomicOrdering (',' 'align' i32)?
-/// Compatibility:
-/// ::= 'volatile' 'store' TypeAndValue ',' TypeAndValue (',' 'align' i32)?
-int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
- bool isVolatile) {
+int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) {
Value *Val, *Ptr; LocTy Loc, PtrLoc;
unsigned Alignment = 0;
bool AteExtraComma = false;
@@ -3748,15 +3729,12 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
SynchronizationScope Scope = CrossThread;
if (Lex.getKind() == lltok::kw_atomic) {
- if (isVolatile)
- return TokError("mixing atomic with old volatile placement");
isAtomic = true;
Lex.Lex();
}
+ bool isVolatile = false;
if (Lex.getKind() == lltok::kw_volatile) {
- if (isVolatile)
- return TokError("duplicate volatile before and after store");
isVolatile = true;
Lex.Lex();
}
@@ -3902,13 +3880,15 @@ int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) {
/// ParseGetElementPtr
/// ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)*
int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
- Value *Ptr, *Val; LocTy Loc, EltLoc;
+ Value *Ptr = 0;
+ Value *Val = 0;
+ LocTy Loc, EltLoc;
bool InBounds = EatIfPresent(lltok::kw_inbounds);
if (ParseTypeAndValue(Ptr, Loc, PFS)) return true;
- if (!Ptr->getType()->isPointerTy())
+ if (!Ptr->getType()->getScalarType()->isPointerTy())
return Error(Loc, "base of getelementptr must be a pointer");
SmallVector<Value*, 16> Indices;
@@ -3919,11 +3899,23 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
break;
}
if (ParseTypeAndValue(Val, EltLoc, PFS)) return true;
- if (!Val->getType()->isIntegerTy())
+ if (!Val->getType()->getScalarType()->isIntegerTy())
return Error(EltLoc, "getelementptr index must be an integer");
+ if (Val->getType()->isVectorTy() != Ptr->getType()->isVectorTy())
+ return Error(EltLoc, "getelementptr index type missmatch");
+ if (Val->getType()->isVectorTy()) {
+ unsigned ValNumEl = cast<VectorType>(Val->getType())->getNumElements();
+ unsigned PtrNumEl = cast<VectorType>(Ptr->getType())->getNumElements();
+ if (ValNumEl != PtrNumEl)
+ return Error(EltLoc,
+ "getelementptr vector index has a wrong number of elements");
+ }
Indices.push_back(Val);
}
+ if (Val && Val->getType()->isVectorTy() && Indices.size() != 1)
+ return Error(EltLoc, "vector getelementptrs must have a single index");
+
if (!GetElementPtrInst::getIndexedType(Ptr->getType(), Indices))
return Error(Loc, "invalid getelementptr indices");
Inst = GetElementPtrInst::Create(Ptr, Indices);
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index cbc3c23..c2537d7 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -363,8 +363,8 @@ namespace llvm {
bool ParseLandingPad(Instruction *&I, PerFunctionState &PFS);
bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail);
int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
- int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
- int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
+ int ParseLoad(Instruction *&I, PerFunctionState &PFS);
+ int ParseStore(Instruction *&I, PerFunctionState &PFS);
int ParseCmpXchg(Instruction *&I, PerFunctionState &PFS);
int ParseAtomicRMW(Instruction *&I, PerFunctionState &PFS);
int ParseFence(Instruction *&I, PerFunctionState &PFS);
diff --git a/lib/AsmParser/LLVMBuild.txt b/lib/AsmParser/LLVMBuild.txt
index ad56d4c..3bc31ed 100644
--- a/lib/AsmParser/LLVMBuild.txt
+++ b/lib/AsmParser/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = AsmParser
parent = Libraries
required_libraries = Core Support
-
diff --git a/lib/Bitcode/LLVMBuild.txt b/lib/Bitcode/LLVMBuild.txt
index 696440d..af9936b 100644
--- a/lib/Bitcode/LLVMBuild.txt
+++ b/lib/Bitcode/LLVMBuild.txt
@@ -15,8 +15,10 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = Reader Writer
+
[component_0]
type = Group
name = Bitcode
parent = Libraries
-
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 6ecdbae..d584015 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -403,14 +403,6 @@ Type *BitcodeReader::getTypeByID(unsigned ID) {
return TypeList[ID] = StructType::create(Context);
}
-/// FIXME: Remove in LLVM 3.1, only used by ParseOldTypeTable.
-Type *BitcodeReader::getTypeByIDOrNull(unsigned ID) {
- if (ID >= TypeList.size())
- TypeList.resize(ID+1);
-
- return TypeList[ID];
-}
-
//===----------------------------------------------------------------------===//
// Functions for parsing blocks from the bitcode file
@@ -747,264 +739,6 @@ bool BitcodeReader::ParseTypeTableBody() {
}
}
-// FIXME: Remove in LLVM 3.1
-bool BitcodeReader::ParseOldTypeTable() {
- if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_OLD))
- return Error("Malformed block record");
-
- if (!TypeList.empty())
- return Error("Multiple TYPE_BLOCKs found!");
-
-
- // While horrible, we have no good ordering of types in the bc file. Just
- // iteratively parse types out of the bc file in multiple passes until we get
- // them all. Do this by saving a cursor for the start of the type block.
- BitstreamCursor StartOfTypeBlockCursor(Stream);
-
- unsigned NumTypesRead = 0;
-
- SmallVector<uint64_t, 64> Record;
-RestartScan:
- unsigned NextTypeID = 0;
- bool ReadAnyTypes = false;
-
- // Read all the records for this type table.
- while (1) {
- unsigned Code = Stream.ReadCode();
- if (Code == bitc::END_BLOCK) {
- if (NextTypeID != TypeList.size())
- return Error("Invalid type forward reference in TYPE_BLOCK_ID_OLD");
-
- // If we haven't read all of the types yet, iterate again.
- if (NumTypesRead != TypeList.size()) {
- // If we didn't successfully read any types in this pass, then we must
- // have an unhandled forward reference.
- if (!ReadAnyTypes)
- return Error("Obsolete bitcode contains unhandled recursive type");
-
- Stream = StartOfTypeBlockCursor;
- goto RestartScan;
- }
-
- if (Stream.ReadBlockEnd())
- return Error("Error at end of type table block");
- return false;
- }
-
- if (Code == bitc::ENTER_SUBBLOCK) {
- // No known subblocks, always skip them.
- Stream.ReadSubBlockID();
- if (Stream.SkipBlock())
- return Error("Malformed block record");
- continue;
- }
-
- if (Code == bitc::DEFINE_ABBREV) {
- Stream.ReadAbbrevRecord();
- continue;
- }
-
- // Read a record.
- Record.clear();
- Type *ResultTy = 0;
- switch (Stream.ReadRecord(Code, Record)) {
- default: return Error("unknown type in type table");
- case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
- // TYPE_CODE_NUMENTRY contains a count of the number of types in the
- // type list. This allows us to reserve space.
- if (Record.size() < 1)
- return Error("Invalid TYPE_CODE_NUMENTRY record");
- TypeList.resize(Record[0]);
- continue;
- case bitc::TYPE_CODE_VOID: // VOID
- ResultTy = Type::getVoidTy(Context);
- break;
- case bitc::TYPE_CODE_FLOAT: // FLOAT
- ResultTy = Type::getFloatTy(Context);
- break;
- case bitc::TYPE_CODE_DOUBLE: // DOUBLE
- ResultTy = Type::getDoubleTy(Context);
- break;
- case bitc::TYPE_CODE_X86_FP80: // X86_FP80
- ResultTy = Type::getX86_FP80Ty(Context);
- break;
- case bitc::TYPE_CODE_FP128: // FP128
- ResultTy = Type::getFP128Ty(Context);
- break;
- case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128
- ResultTy = Type::getPPC_FP128Ty(Context);
- break;
- case bitc::TYPE_CODE_LABEL: // LABEL
- ResultTy = Type::getLabelTy(Context);
- break;
- case bitc::TYPE_CODE_METADATA: // METADATA
- ResultTy = Type::getMetadataTy(Context);
- break;
- case bitc::TYPE_CODE_X86_MMX: // X86_MMX
- ResultTy = Type::getX86_MMXTy(Context);
- break;
- case bitc::TYPE_CODE_INTEGER: // INTEGER: [width]
- if (Record.size() < 1)
- return Error("Invalid Integer type record");
- ResultTy = IntegerType::get(Context, Record[0]);
- break;
- case bitc::TYPE_CODE_OPAQUE: // OPAQUE
- if (NextTypeID < TypeList.size() && TypeList[NextTypeID] == 0)
- ResultTy = StructType::create(Context);
- break;
- case bitc::TYPE_CODE_STRUCT_OLD: {// STRUCT_OLD
- if (NextTypeID >= TypeList.size()) break;
- // If we already read it, don't reprocess.
- if (TypeList[NextTypeID] &&
- !cast<StructType>(TypeList[NextTypeID])->isOpaque())
- break;
-
- // Set a type.
- if (TypeList[NextTypeID] == 0)
- TypeList[NextTypeID] = StructType::create(Context);
-
- std::vector<Type*> EltTys;
- for (unsigned i = 1, e = Record.size(); i != e; ++i) {
- if (Type *Elt = getTypeByIDOrNull(Record[i]))
- EltTys.push_back(Elt);
- else
- break;
- }
-
- if (EltTys.size() != Record.size()-1)
- break; // Not all elements are ready.
-
- cast<StructType>(TypeList[NextTypeID])->setBody(EltTys, Record[0]);
- ResultTy = TypeList[NextTypeID];
- TypeList[NextTypeID] = 0;
- break;
- }
- case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
- // [pointee type, address space]
- if (Record.size() < 1)
- return Error("Invalid POINTER type record");
- unsigned AddressSpace = 0;
- if (Record.size() == 2)
- AddressSpace = Record[1];
- if ((ResultTy = getTypeByIDOrNull(Record[0])))
- ResultTy = PointerType::get(ResultTy, AddressSpace);
- break;
- }
- case bitc::TYPE_CODE_FUNCTION_OLD: {
- // FIXME: attrid is dead, remove it in LLVM 3.0
- // FUNCTION: [vararg, attrid, retty, paramty x N]
- if (Record.size() < 3)
- return Error("Invalid FUNCTION type record");
- std::vector<Type*> ArgTys;
- for (unsigned i = 3, e = Record.size(); i != e; ++i) {
- if (Type *Elt = getTypeByIDOrNull(Record[i]))
- ArgTys.push_back(Elt);
- else
- break;
- }
- if (ArgTys.size()+3 != Record.size())
- break; // Something was null.
- if ((ResultTy = getTypeByIDOrNull(Record[2])))
- ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
- break;
- }
- case bitc::TYPE_CODE_FUNCTION: {
- // FUNCTION: [vararg, retty, paramty x N]
- if (Record.size() < 2)
- return Error("Invalid FUNCTION type record");
- std::vector<Type*> ArgTys;
- for (unsigned i = 2, e = Record.size(); i != e; ++i) {
- if (Type *Elt = getTypeByIDOrNull(Record[i]))
- ArgTys.push_back(Elt);
- else
- break;
- }
- if (ArgTys.size()+2 != Record.size())
- break; // Something was null.
- if ((ResultTy = getTypeByIDOrNull(Record[1])))
- ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
- break;
- }
- case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty]
- if (Record.size() < 2)
- return Error("Invalid ARRAY type record");
- if ((ResultTy = getTypeByIDOrNull(Record[1])))
- ResultTy = ArrayType::get(ResultTy, Record[0]);
- break;
- case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty]
- if (Record.size() < 2)
- return Error("Invalid VECTOR type record");
- if ((ResultTy = getTypeByIDOrNull(Record[1])))
- ResultTy = VectorType::get(ResultTy, Record[0]);
- break;
- }
-
- if (NextTypeID >= TypeList.size())
- return Error("invalid TYPE table");
-
- if (ResultTy && TypeList[NextTypeID] == 0) {
- ++NumTypesRead;
- ReadAnyTypes = true;
-
- TypeList[NextTypeID] = ResultTy;
- }
-
- ++NextTypeID;
- }
-}
-
-
-bool BitcodeReader::ParseOldTypeSymbolTable() {
- if (Stream.EnterSubBlock(bitc::TYPE_SYMTAB_BLOCK_ID_OLD))
- return Error("Malformed block record");
-
- SmallVector<uint64_t, 64> Record;
-
- // Read all the records for this type table.
- std::string TypeName;
- while (1) {
- unsigned Code = Stream.ReadCode();
- if (Code == bitc::END_BLOCK) {
- if (Stream.ReadBlockEnd())
- return Error("Error at end of type symbol table block");
- return false;
- }
-
- if (Code == bitc::ENTER_SUBBLOCK) {
- // No known subblocks, always skip them.
- Stream.ReadSubBlockID();
- if (Stream.SkipBlock())
- return Error("Malformed block record");
- continue;
- }
-
- if (Code == bitc::DEFINE_ABBREV) {
- Stream.ReadAbbrevRecord();
- continue;
- }
-
- // Read a record.
- Record.clear();
- switch (Stream.ReadRecord(Code, Record)) {
- default: // Default behavior: unknown type.
- break;
- case bitc::TST_CODE_ENTRY: // TST_ENTRY: [typeid, namechar x N]
- if (ConvertToString(Record, 1, TypeName))
- return Error("Invalid TST_ENTRY record");
- unsigned TypeID = Record[0];
- if (TypeID >= TypeList.size())
- return Error("Invalid Type ID in TST_ENTRY record");
-
- // Only apply the type name to a struct type with no name.
- if (StructType *STy = dyn_cast<StructType>(TypeList[TypeID]))
- if (!STy->isLiteral() && !STy->hasName())
- STy->setName(TypeName);
- TypeName.clear();
- break;
- }
- }
-}
-
bool BitcodeReader::ParseValueSymbolTable() {
if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
return Error("Malformed block record");
@@ -1553,6 +1287,50 @@ bool BitcodeReader::ParseConstants() {
return false;
}
+bool BitcodeReader::ParseUseLists() {
+ if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID))
+ return Error("Malformed block record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Read all the records.
+ while (1) {
+ unsigned Code = Stream.ReadCode();
+ if (Code == bitc::END_BLOCK) {
+ if (Stream.ReadBlockEnd())
+ return Error("Error at end of use-list table block");
+ return false;
+ }
+
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ // No known subblocks, always skip them.
+ Stream.ReadSubBlockID();
+ if (Stream.SkipBlock())
+ return Error("Malformed block record");
+ continue;
+ }
+
+ if (Code == bitc::DEFINE_ABBREV) {
+ Stream.ReadAbbrevRecord();
+ continue;
+ }
+
+ // Read a use list record.
+ Record.clear();
+ switch (Stream.ReadRecord(Code, Record)) {
+ default: // Default behavior: unknown type.
+ break;
+ case bitc::USELIST_CODE_ENTRY: { // USELIST_CODE_ENTRY: TBD.
+ unsigned RecordLength = Record.size();
+ if (RecordLength < 1)
+ return Error ("Invalid UseList reader!");
+ UseListRecords.push_back(Record);
+ break;
+ }
+ }
+ }
+}
+
/// RememberAndSkipFunctionBody - When we see the block for a function body,
/// remember where it is and then skip it. This lets us lazily deserialize the
/// functions.
@@ -1636,14 +1414,6 @@ bool BitcodeReader::ParseModule() {
if (ParseTypeTable())
return true;
break;
- case bitc::TYPE_BLOCK_ID_OLD:
- if (ParseOldTypeTable())
- return true;
- break;
- case bitc::TYPE_SYMTAB_BLOCK_ID_OLD:
- if (ParseOldTypeSymbolTable())
- return true;
- break;
case bitc::VALUE_SYMTAB_BLOCK_ID:
if (ParseValueSymbolTable())
return true;
@@ -1667,6 +1437,10 @@ bool BitcodeReader::ParseModule() {
if (RememberAndSkipFunctionBody())
return true;
break;
+ case bitc::USELIST_BLOCK_ID:
+ if (ParseUseLists())
+ return true;
+ break;
}
continue;
}
@@ -2975,12 +2749,6 @@ bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) {
}
std::vector<std::pair<Function*, Function*> >().swap(UpgradedIntrinsics);
- // Upgrade to new EH scheme. N.B. This will go away in 3.1.
- UpgradeExceptionHandling(M);
-
- // Check debug info intrinsics.
- CheckDebugInfoIntrinsics(TheModule);
-
return false;
}
@@ -3026,6 +2794,9 @@ Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context,
return 0;
}
+ // TODO: Restore the use-lists to the in-memory state when the bitcode was
+ // written. We must defer until the Module has been fully materialized.
+
return M;
}
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index 6e6118c..978b15b 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -135,6 +135,7 @@ class BitcodeReader : public GVMaterializer {
BitcodeReaderValueList ValueList;
BitcodeReaderMDValueList MDValueList;
SmallVector<Instruction *, 64> InstructionList;
+ SmallVector<SmallVector<uint64_t, 64>, 64> UseListRecords;
std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
@@ -211,7 +212,6 @@ public:
bool ParseTriple(std::string &Triple);
private:
Type *getTypeByID(unsigned ID);
- Type *getTypeByIDOrNull(unsigned ID);
Value *getFnValueByID(unsigned ID, Type *Ty) {
if (Ty && Ty->isMetadataTy())
return MDValueList.getValueFwdRef(ID);
@@ -259,10 +259,8 @@ private:
bool ParseModule();
bool ParseAttributeBlock();
bool ParseTypeTable();
- bool ParseOldTypeTable(); // FIXME: Remove in LLVM 3.1
bool ParseTypeTableBody();
- bool ParseOldTypeSymbolTable(); // FIXME: Remove in LLVM 3.1
bool ParseValueSymbolTable();
bool ParseConstants();
bool RememberAndSkipFunctionBody();
@@ -271,6 +269,7 @@ private:
bool ParseMetadata();
bool ParseMetadataAttachment();
bool ParseModuleTriple(std::string &Triple);
+ bool ParseUseLists();
};
} // End llvm namespace
diff --git a/lib/Bitcode/Reader/CMakeLists.txt b/lib/Bitcode/Reader/CMakeLists.txt
index 37bebc4..693d431 100644
--- a/lib/Bitcode/Reader/CMakeLists.txt
+++ b/lib/Bitcode/Reader/CMakeLists.txt
@@ -2,8 +2,3 @@ add_llvm_library(LLVMBitReader
BitReader.cpp
BitcodeReader.cpp
)
-
-add_llvm_library_dependencies(LLVMBitReader
- LLVMCore
- LLVMSupport
- )
diff --git a/lib/Bitcode/Reader/LLVMBuild.txt b/lib/Bitcode/Reader/LLVMBuild.txt
index 948b335..c85a87b 100644
--- a/lib/Bitcode/Reader/LLVMBuild.txt
+++ b/lib/Bitcode/Reader/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = BitReader
parent = Bitcode
required_libraries = Core Support
-
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index e758f94..d980163 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -23,6 +23,7 @@
#include "llvm/Operator.h"
#include "llvm/ValueSymbolTable.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -31,6 +32,12 @@
#include <map>
using namespace llvm;
+static cl::opt<bool>
+EnablePreserveUseListOrdering("enable-bc-uselist-preserve",
+ cl::desc("Turn on experimental support for "
+ "use-list order preservation."),
+ cl::init(false), cl::Hidden);
+
/// These are manifest constants used by the bitcode writer. They do not need to
/// be kept in sync with the reader, but need to be consistent within this file.
enum {
@@ -194,11 +201,12 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
Stream.EnterSubblock(bitc::TYPE_BLOCK_ID_NEW, 4 /*count from # abbrevs */);
SmallVector<uint64_t, 64> TypeVals;
+ uint64_t NumBits = Log2_32_Ceil(VE.getTypes().size()+1);
+
// Abbrev for TYPE_CODE_POINTER.
BitCodeAbbrev *Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
- Log2_32_Ceil(VE.getTypes().size()+1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
Abbv->Add(BitCodeAbbrevOp(0)); // Addrspace = 0
unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv);
@@ -207,8 +215,8 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isvararg
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
- Log2_32_Ceil(VE.getTypes().size()+1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+
unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv);
// Abbrev for TYPE_CODE_STRUCT_ANON.
@@ -216,8 +224,8 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
- Log2_32_Ceil(VE.getTypes().size()+1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+
unsigned StructAnonAbbrev = Stream.EmitAbbrev(Abbv);
// Abbrev for TYPE_CODE_STRUCT_NAME.
@@ -232,16 +240,16 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
- Log2_32_Ceil(VE.getTypes().size()+1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+
unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv);
// Abbrev for TYPE_CODE_ARRAY.
Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // size
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
- Log2_32_Ceil(VE.getTypes().size()+1)));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+
unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv);
// Emit an entry count so the reader can reserve space.
@@ -497,8 +505,8 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
// Emit the function proto information.
for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
- // FUNCTION: [type, callingconv, isproto, paramattr,
- // linkage, alignment, section, visibility, gc, unnamed_addr]
+ // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment,
+ // section, visibility, gc, unnamed_addr]
Vals.push_back(VE.getTypeID(F->getType()));
Vals.push_back(F->getCallingConv());
Vals.push_back(F->isDeclaration());
@@ -518,6 +526,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
// Emit the alias information.
for (Module::const_alias_iterator AI = M->alias_begin(), E = M->alias_end();
AI != E; ++AI) {
+ // ALIAS: [alias type, aliasee val#, linkage, visibility]
Vals.push_back(VE.getTypeID(AI->getType()));
Vals.push_back(VE.getValueID(AI->getAliasee()));
Vals.push_back(getEncodedLinkage(AI));
@@ -1571,6 +1580,102 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
Stream.ExitBlock();
}
+// Sort the Users based on the order in which the reader parses the bitcode
+// file.
+static bool bitcodereader_order(const User *lhs, const User *rhs) {
+ // TODO: Implement.
+ return true;
+}
+
+static void WriteUseList(const Value *V, const ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+
+ // One or zero uses can't get out of order.
+ if (V->use_empty() || V->hasNUses(1))
+ return;
+
+ // Make a copy of the in-memory use-list for sorting.
+ unsigned UseListSize = std::distance(V->use_begin(), V->use_end());
+ SmallVector<const User*, 8> UseList;
+ UseList.reserve(UseListSize);
+ for (Value::const_use_iterator I = V->use_begin(), E = V->use_end();
+ I != E; ++I) {
+ const User *U = *I;
+ UseList.push_back(U);
+ }
+
+ // Sort the copy based on the order read by the BitcodeReader.
+ std::sort(UseList.begin(), UseList.end(), bitcodereader_order);
+
+ // TODO: Generate a diff between the BitcodeWriter in-memory use-list and the
+ // sorted list (i.e., the expected BitcodeReader in-memory use-list).
+
+ // TODO: Emit the USELIST_CODE_ENTRYs.
+}
+
+static void WriteFunctionUseList(const Function *F, ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ VE.incorporateFunction(*F);
+
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI)
+ WriteUseList(AI, VE, Stream);
+ for (Function::const_iterator BB = F->begin(), FE = F->end(); BB != FE;
+ ++BB) {
+ WriteUseList(BB, VE, Stream);
+ for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
+ ++II) {
+ WriteUseList(II, VE, Stream);
+ for (User::const_op_iterator OI = II->op_begin(), E = II->op_end();
+ OI != E; ++OI) {
+ if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
+ isa<InlineAsm>(*OI))
+ WriteUseList(*OI, VE, Stream);
+ }
+ }
+ }
+ VE.purgeFunction();
+}
+
+// Emit use-lists.
+static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE,
+ BitstreamWriter &Stream) {
+ Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3);
+
+ // XXX: this modifies the module, but in a way that should never change the
+ // behavior of any pass or codegen in LLVM. The problem is that GVs may
+ // contain entries in the use_list that do not exist in the Module and are
+ // not stored in the .bc file.
+ for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I)
+ I->removeDeadConstantUsers();
+
+ // Write the global variables.
+ for (Module::const_global_iterator GI = M->global_begin(),
+ GE = M->global_end(); GI != GE; ++GI) {
+ WriteUseList(GI, VE, Stream);
+
+ // Write the global variable initializers.
+ if (GI->hasInitializer())
+ WriteUseList(GI->getInitializer(), VE, Stream);
+ }
+
+ // Write the functions.
+ for (Module::const_iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) {
+ WriteUseList(FI, VE, Stream);
+ if (!FI->isDeclaration())
+ WriteFunctionUseList(FI, VE, Stream);
+ }
+
+ // Write the aliases.
+ for (Module::const_alias_iterator AI = M->alias_begin(), AE = M->alias_end();
+ AI != AE; ++AI) {
+ WriteUseList(AI, VE, Stream);
+ WriteUseList(AI->getAliasee(), VE, Stream);
+ }
+
+ Stream.ExitBlock();
+}
/// WriteModule - Emit the specified module to the bitstream.
static void WriteModule(const Module *M, BitstreamWriter &Stream) {
@@ -1616,6 +1721,10 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
// Emit names for globals/functions etc.
WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
+ // Emit use-lists.
+ if (EnablePreserveUseListOrdering)
+ WriteModuleUseLists(M, VE, Stream);
+
Stream.ExitBlock();
}
diff --git a/lib/Bitcode/Writer/CMakeLists.txt b/lib/Bitcode/Writer/CMakeLists.txt
index 3cf9056..f097b09 100644
--- a/lib/Bitcode/Writer/CMakeLists.txt
+++ b/lib/Bitcode/Writer/CMakeLists.txt
@@ -4,8 +4,3 @@ add_llvm_library(LLVMBitWriter
BitcodeWriterPass.cpp
ValueEnumerator.cpp
)
-
-add_llvm_library_dependencies(LLVMBitWriter
- LLVMCore
- LLVMSupport
- )
diff --git a/lib/Bitcode/Writer/LLVMBuild.txt b/lib/Bitcode/Writer/LLVMBuild.txt
index 39ff04e..7d9e1de 100644
--- a/lib/Bitcode/Writer/LLVMBuild.txt
+++ b/lib/Bitcode/Writer/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = BitWriter
parent = Bitcode
required_libraries = Core Support
-
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 9ae9905..1c4d670 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -19,6 +19,8 @@
#include "llvm/Module.h"
#include "llvm/ValueSymbolTable.h"
#include "llvm/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
using namespace llvm;
@@ -107,7 +109,6 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
OptimizeConstants(FirstConstant, Values.size());
}
-
unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
InstructionMapType::const_iterator I = InstructionMap.find(Inst);
assert(I != InstructionMap.end() && "Instruction is not mapped!");
@@ -130,6 +131,43 @@ unsigned ValueEnumerator::getValueID(const Value *V) const {
return I->second-1;
}
+void ValueEnumerator::dump() const {
+ print(dbgs(), ValueMap, "Default");
+ dbgs() << '\n';
+ print(dbgs(), MDValueMap, "MetaData");
+ dbgs() << '\n';
+}
+
+void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map,
+ const char *Name) const {
+
+ OS << "Map Name: " << Name << "\n";
+ OS << "Size: " << Map.size() << "\n";
+ for (ValueMapType::const_iterator I = Map.begin(),
+ E = Map.end(); I != E; ++I) {
+
+ const Value *V = I->first;
+ if (V->hasName())
+ OS << "Value: " << V->getName();
+ else
+ OS << "Value: [null]\n";
+ V->dump();
+
+ OS << " Uses(" << std::distance(V->use_begin(),V->use_end()) << "):";
+ for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ if (UI != V->use_begin())
+ OS << ",";
+ if((*UI)->hasName())
+ OS << " " << (*UI)->getName();
+ else
+ OS << " [null]";
+
+ }
+ OS << "\n\n";
+ }
+}
+
// Optimize constant ordering.
namespace {
struct CstSortPredicate {
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index b6fc920..a6ca536 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -32,6 +32,7 @@ class NamedMDNode;
class AttrListPtr;
class ValueSymbolTable;
class MDSymbolTable;
+class raw_ostream;
class ValueEnumerator {
public:
@@ -83,6 +84,9 @@ private:
public:
ValueEnumerator(const Module *M);
+ void dump() const;
+ void print(raw_ostream &OS, const ValueMapType &Map, const char *Name) const;
+
unsigned getValueID(const Value *V) const;
unsigned getTypeID(Type *T) const {
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 25842a7..6cf4571 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -148,7 +148,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
assert(State == NULL);
State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
- bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+ bool IsReturnBlock = (!BB->empty() && BB->back().isReturn());
std::vector<unsigned> &KillIndices = State->GetKillIndices();
std::vector<unsigned> &DefIndices = State->GetDefIndices();
@@ -384,7 +384,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
// If MI's defs have a special allocation requirement, don't allow
// any def registers to be changed. Also assume all registers
// defined in a call must not be changed (ABI).
- if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+ if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
TII->isPredicated(MI)) {
DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
State->UnionGroups(Reg, 0);
@@ -451,8 +451,8 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// instruction which may not be executed. The second R6 def may or may not
// re-define R6 so it's not safe to change it since the last R6 use cannot be
// changed.
- bool Special = MI->getDesc().isCall() ||
- MI->getDesc().hasExtraSrcRegAllocReq() ||
+ bool Special = MI->isCall() ||
+ MI->hasExtraSrcRegAllocReq() ||
TII->isPredicated(MI);
// Scan the register uses for this instruction and update
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index fafc010..0c84be5 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -1,4 +1,4 @@
-//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities --*- C++ ------*-===//
+//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
@@ -149,33 +150,40 @@ llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
/// consideration of global floating-point math flags.
///
ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
- ISD::CondCode FPC, FOC;
switch (Pred) {
- case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
- case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
- case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
- case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
- case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
- case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
- case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break;
- case FCmpInst::FCMP_ORD: FOC = FPC = ISD::SETO; break;
- case FCmpInst::FCMP_UNO: FOC = FPC = ISD::SETUO; break;
- case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
- case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
- case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
- case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break;
- case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break;
- case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
- case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break;
- default:
- llvm_unreachable("Invalid FCmp predicate opcode!");
- FOC = FPC = ISD::SETFALSE;
- break;
+ case FCmpInst::FCMP_FALSE: return ISD::SETFALSE;
+ case FCmpInst::FCMP_OEQ: return ISD::SETOEQ;
+ case FCmpInst::FCMP_OGT: return ISD::SETOGT;
+ case FCmpInst::FCMP_OGE: return ISD::SETOGE;
+ case FCmpInst::FCMP_OLT: return ISD::SETOLT;
+ case FCmpInst::FCMP_OLE: return ISD::SETOLE;
+ case FCmpInst::FCMP_ONE: return ISD::SETONE;
+ case FCmpInst::FCMP_ORD: return ISD::SETO;
+ case FCmpInst::FCMP_UNO: return ISD::SETUO;
+ case FCmpInst::FCMP_UEQ: return ISD::SETUEQ;
+ case FCmpInst::FCMP_UGT: return ISD::SETUGT;
+ case FCmpInst::FCMP_UGE: return ISD::SETUGE;
+ case FCmpInst::FCMP_ULT: return ISD::SETULT;
+ case FCmpInst::FCMP_ULE: return ISD::SETULE;
+ case FCmpInst::FCMP_UNE: return ISD::SETUNE;
+ case FCmpInst::FCMP_TRUE: return ISD::SETTRUE;
+ default: break;
+ }
+ llvm_unreachable("Invalid FCmp predicate opcode!");
+ return ISD::SETFALSE;
+}
+
+ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) {
+ switch (CC) {
+ case ISD::SETOEQ: case ISD::SETUEQ: return ISD::SETEQ;
+ case ISD::SETONE: case ISD::SETUNE: return ISD::SETNE;
+ case ISD::SETOLT: case ISD::SETULT: return ISD::SETLT;
+ case ISD::SETOLE: case ISD::SETULE: return ISD::SETLE;
+ case ISD::SETOGT: case ISD::SETUGT: return ISD::SETGT;
+ case ISD::SETOGE: case ISD::SETUGE: return ISD::SETGE;
+ default: break;
}
- if (NoNaNsFPMath)
- return FOC;
- else
- return FPC;
+ return CC;
}
/// getICmpCondCode - Return the ISD condition code corresponding to
@@ -221,12 +229,13 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
// longjmp on x86), it can end up causing miscompilation that has not
// been fully understood.
if (!Ret &&
- (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false;
+ (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt ||
+ !isa<UnreachableInst>(Term))) return false;
// If I will have a chain, make sure no other instruction that will have a
// chain interposes between I and the return.
if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
- !I->isSafeToSpeculativelyExecute())
+ !isSafeToSpeculativelyExecute(I))
for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
--BBI) {
if (&*BBI == I)
@@ -235,7 +244,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
if (isa<DbgInfoIntrinsic>(BBI))
continue;
if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
- !BBI->isSafeToSpeculativelyExecute())
+ !isSafeToSpeculativelyExecute(BBI))
return false;
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 711b796..0c4d0d5 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1308,7 +1308,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List) {
}
// Emit the function pointers in reverse priority order.
- switch (MAI->getStructorOutputOrder()) {
+ switch (getObjFileLowering().getStructorOutputOrder()) {
case Structors::None:
break;
case Structors::PriorityOrder:
@@ -1659,6 +1659,28 @@ static void EmitGlobalConstantVector(const ConstantVector *CV,
AP.OutStreamer.EmitZeros(Padding, AddrSpace);
}
+static void LowerVectorConstant(const Constant *CV, unsigned AddrSpace,
+ AsmPrinter &AP) {
+ // Look through bitcasts
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV))
+ if (CE->getOpcode() == Instruction::BitCast)
+ CV = CE->getOperand(0);
+
+ if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
+ return EmitGlobalConstantVector(V, AddrSpace, AP);
+
+ // If we get here, we're stuck; report the problem to the user.
+ // FIXME: Are there any other useful tricks for vectors?
+ {
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported vector expression in static initializer: ";
+ WriteAsOperand(OS, CV, /*PrintType=*/false,
+ !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+ report_fatal_error(OS.str());
+ }
+}
+
static void EmitGlobalConstantStruct(const ConstantStruct *CS,
unsigned AddrSpace, AsmPrinter &AP) {
// Print the fields in successive locations. Pad to align if needed!
@@ -1813,8 +1835,8 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
return;
}
- if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
- return EmitGlobalConstantVector(V, AddrSpace, AP);
+ if (CV->getType()->isVectorTy())
+ return LowerVectorConstant(CV, AddrSpace, AP);
// Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it
// thread the streamer with EmitValue.
@@ -1987,7 +2009,7 @@ static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB,
void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
// Emit an alignment directive for this block, if needed.
if (unsigned Align = MBB->getAlignment())
- EmitAlignment(Log2_32(Align));
+ EmitAlignment(Align);
// If the block has its address taken, emit any labels that were used to
// reference the block. It is possible that there is more than one label
@@ -2082,7 +2104,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
MachineInstr &MI = *II;
// If it is not a simple branch, we are in a table somewhere.
- if (!MI.getDesc().isBranch() || MI.getDesc().isIndirectBranch())
+ if (!MI.isBranch() || MI.isIndirectBranch())
return false;
// If we are the operands of one of the branches, this is not
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index f6ce17d..58fe2ed 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -12,13 +12,3 @@ add_llvm_library(LLVMAsmPrinter
OcamlGCPrinter.cpp
Win64Exception.cpp
)
-
-add_llvm_library_dependencies(LLVMAsmPrinter
- LLVMAnalysis
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMMCParser
- LLVMSupport
- LLVMTarget
- )
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 237998a..8cb5156 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -791,13 +791,13 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
DISubprogram SP(Element);
ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element));
if (SP.isProtected())
- addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_protected);
else if (SP.isPrivate())
- addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_private);
else
- addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_public);
if (SP.isExplicit())
addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1);
@@ -988,7 +988,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
unsigned VK = SP.getVirtuality();
if (VK) {
- addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
+ addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK);
DIEBlock *Block = getDIEBlock();
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
@@ -1398,17 +1398,17 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
if (DT.isProtected())
- addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_protected);
else if (DT.isPrivate())
- addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_private);
// Otherwise C++ member and base classes are considered public.
else
- addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
dwarf::DW_ACCESS_public);
if (DT.isVirtual())
- addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag,
+ addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1,
dwarf::DW_VIRTUALITY_virtual);
// Objective-C properties.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index dc46a58..a3db96a 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -442,6 +442,10 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID());
TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
+ // Add name to the name table, we do this here because we're guaranteed
+ // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
+ addSubprogramNames(TheCU, InlinedSP, ScopeDIE);
+
return ScopeDIE;
}
@@ -1414,7 +1418,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope);
- if (!DisableFramePointerElim(*MF))
+ if (!MF->getTarget().Options.DisableFramePointerElim(*MF))
TheCU->addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr,
dwarf::DW_FORM_flag, 1);
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index e0f2e85..bf7f7ee 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -184,7 +184,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
/// CallToNoUnwindFunction - Return `true' if this is a call to a function
/// marked `nounwind'. Return `false' otherwise.
bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
- assert(MI->getDesc().isCall() && "This should be a call instruction!");
+ assert(MI->isCall() && "This should be a call instruction!");
bool MarkedNoUnwind = false;
bool SawFunc = false;
@@ -243,7 +243,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
MI != E; ++MI) {
if (!MI->isLabel()) {
- if (MI->getDesc().isCall())
+ if (MI->isCall())
SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI);
continue;
}
diff --git a/lib/CodeGen/AsmPrinter/LLVMBuild.txt b/lib/CodeGen/AsmPrinter/LLVMBuild.txt
index 0f2059f..20b1f7b 100644
--- a/lib/CodeGen/AsmPrinter/LLVMBuild.txt
+++ b/lib/CodeGen/AsmPrinter/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = AsmPrinter
parent = Libraries
required_libraries = Analysis CodeGen Core MC MCParser Support Target
-
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 5dec368..89894c3 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -432,10 +432,9 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
for (; I != E; ++I) {
if (I->isDebugValue())
continue;
- const MCInstrDesc &MCID = I->getDesc();
- if (MCID.isCall())
+ if (I->isCall())
Time += 10;
- else if (MCID.mayLoad() || MCID.mayStore())
+ else if (I->mayLoad() || I->mayStore())
Time += 2;
else
++Time;
@@ -502,7 +501,7 @@ static unsigned CountTerminators(MachineBasicBlock *MBB,
break;
}
--I;
- if (!I->getDesc().isTerminator()) break;
+ if (!I->isTerminator()) break;
++NumTerms;
}
return NumTerms;
@@ -550,8 +549,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
// heuristics.
unsigned EffectiveTailLen = CommonTailLen;
if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
- !MBB1->back().getDesc().isBarrier() &&
- !MBB2->back().getDesc().isBarrier())
+ !MBB1->back().isBarrier() &&
+ !MBB2->back().isBarrier())
++EffectiveTailLen;
// Check if the common tail is long enough to be worthwhile.
@@ -927,8 +926,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (MergePotentials.size() >= 2)
MadeChange |= TryTailMergeBlocks(IBB, PredBB);
// Reinsert an unconditional branch if needed.
- // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks.
- PredBB = prior(I); // this may have been changed in TryTailMergeBlocks
+ // The 1 below can occur as a result of removing blocks in
+ // TryTailMergeBlocks.
+ PredBB = prior(I); // this may have been changed in TryTailMergeBlocks
if (MergePotentials.size() == 1 &&
MergePotentials.begin()->getBlock() != PredBB)
FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
@@ -983,7 +983,7 @@ static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {
if (!MBBI->isDebugValue())
break;
}
- return (MBBI->getDesc().isBranch());
+ return (MBBI->isBranch());
}
/// IsBetterFallthrough - Return true if it would be clearly better to
@@ -1011,7 +1011,7 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
MachineBasicBlock::iterator MBB2I = --MBB2->end();
while (MBB2I->isDebugValue())
--MBB2I;
- return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
+ return MBB2I->isCall() && !MBB1I->isCall();
}
/// OptimizeBlock - Analyze and optimize control flow related to the specified
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index c8d4dcf..7aee3bb 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_library(LLVMCodeGen
CodePlacementOpt.cpp
CriticalAntiDepBreaker.cpp
DeadMachineInstructionElim.cpp
+ DFAPacketizer.cpp
DwarfEHPrepare.cpp
EdgeBundles.cpp
ELFCodeEmitter.cpp
@@ -46,6 +47,7 @@ add_llvm_library(LLVMCodeGen
MachineFunctionPass.cpp
MachineFunctionPrinterPass.cpp
MachineInstr.cpp
+ MachineInstrBundle.cpp
MachineLICM.cpp
MachineLoopInfo.cpp
MachineLoopRanges.cpp
@@ -87,27 +89,18 @@ add_llvm_library(LLVMCodeGen
Spiller.cpp
SpillPlacement.cpp
SplitKit.cpp
- Splitter.cpp
StackProtector.cpp
StackSlotColoring.cpp
StrongPHIElimination.cpp
TailDuplication.cpp
+ TargetFrameLoweringImpl.cpp
TargetInstrInfoImpl.cpp
TargetLoweringObjectFileImpl.cpp
+ TargetOptionsImpl.cpp
TwoAddressInstructionPass.cpp
UnreachableBlockElim.cpp
VirtRegMap.cpp
)
-add_llvm_library_dependencies(LLVMCodeGen
- LLVMAnalysis
- LLVMCore
- LLVMMC
- LLVMScalarOpts
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
- )
-
add_subdirectory(SelectionDAG)
add_subdirectory(AsmPrinter)
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 3112c22..48b71d9 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -45,7 +45,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeRegisterCoalescerPass(Registry);
initializeRenderMachineFunctionPass(Registry);
initializeSlotIndexesPass(Registry);
- initializeLoopSplitterPass(Registry);
initializeStackProtectorPass(Registry);
initializeStackSlotColoringPass(Registry);
initializeStrongPHIEliminationPass(Registry);
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 84c4d59..128143e 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -54,7 +54,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// Clear "do not change" set.
KeepRegs.clear();
- bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+ bool IsReturnBlock = (!BB->empty() && BB->back().isReturn());
// Determine the live-out physregs for this block.
if (IsReturnBlock) {
@@ -193,8 +193,8 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
// instruction which may not be executed. The second R6 def may or may not
// re-define R6 so it's not safe to change it since the last R6 use cannot be
// changed.
- bool Special = MI->getDesc().isCall() ||
- MI->getDesc().hasExtraSrcRegAllocReq() ||
+ bool Special = MI->isCall() ||
+ MI->hasExtraSrcRegAllocReq() ||
TII->isPredicated(MI);
// Scan the register operands for this instruction and update
@@ -572,7 +572,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// If MI's defs have a special allocation requirement, don't allow
// any def registers to be changed. Also assume all registers
// defined in a call must not be changed (ABI).
- if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+ if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
TII->isPredicated(MI))
// If this instruction's defs have special allocation requirement, don't
// break this anti-dependency.
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
new file mode 100644
index 0000000..16276bd
--- /dev/null
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -0,0 +1,98 @@
+//=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This class implements a deterministic finite automaton (DFA) based
+// packetizing mechanism for VLIW architectures. It provides APIs to
+// determine whether there exists a legal mapping of instructions to
+// functional unit assignments in a packet. The DFA is auto-generated from
+// the target's Schedule.td file.
+//
+// A DFA consists of 3 major elements: states, inputs, and transitions. For
+// the packetizing mechanism, the input is the set of instruction classes for
+// a target. The state models all possible combinations of functional unit
+// consumption for a given set of instructions in a packet. A transition
+// models the addition of an instruction to a packet. In the DFA constructed
+// by this class, if an instruction can be added to a packet, then a valid
+// transition exists from the corresponding state. Invalid transitions
+// indicate that the instruction cannot be added to the current packet.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCInstrItineraries.h"
+using namespace llvm;
+
+DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
+ const unsigned *SET):
+ InstrItins(I), CurrentState(0), DFAStateInputTable(SIT),
+ DFAStateEntryTable(SET) {}
+
+
+//
+// ReadTable - Read the DFA transition table and update CachedTable.
+//
+// Format of the transition tables:
+// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid
+// transitions
+// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable
+// for the ith state
+//
+void DFAPacketizer::ReadTable(unsigned int state) {
+ unsigned ThisState = DFAStateEntryTable[state];
+ unsigned NextStateInTable = DFAStateEntryTable[state+1];
+ // Early exit in case CachedTable has already contains this
+ // state's transitions.
+ if (CachedTable.count(UnsignPair(state,
+ DFAStateInputTable[ThisState][0])))
+ return;
+
+ for (unsigned i = ThisState; i < NextStateInTable; i++)
+ CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] =
+ DFAStateInputTable[i][1];
+}
+
+
+// canReserveResources - Check if the resources occupied by a MCInstrDesc
+// are available in the current state.
+bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) {
+ unsigned InsnClass = MID->getSchedClass();
+ const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
+ unsigned FuncUnits = IS->getUnits();
+ UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+ ReadTable(CurrentState);
+ return (CachedTable.count(StateTrans) != 0);
+}
+
+
+// reserveResources - Reserve the resources occupied by a MCInstrDesc and
+// change the current state to reflect that change.
+void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
+ unsigned InsnClass = MID->getSchedClass();
+ const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
+ unsigned FuncUnits = IS->getUnits();
+ UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+ ReadTable(CurrentState);
+ assert(CachedTable.count(StateTrans) != 0);
+ CurrentState = CachedTable[StateTrans];
+}
+
+
+// canReserveResources - Check if the resources occupied by a machine
+// instruction are available in the current state.
+bool DFAPacketizer::canReserveResources(llvm::MachineInstr *MI) {
+ const llvm::MCInstrDesc &MID = MI->getDesc();
+ return canReserveResources(&MID);
+}
+
+// reserveResources - Reserve the resources occupied by a machine
+// instruction and change the current state to reflect that change.
+void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) {
+ const llvm::MCInstrDesc &MID = MI->getDesc();
+ reserveResources(&MID);
+}
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 6de6c0c..ba135e1 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -102,7 +102,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
LivePhysRegs = ReservedRegs;
// Also add any explicit live-out physregs for this block.
- if (!MBB->empty() && MBB->back().getDesc().isReturn())
+ if (!MBB->empty() && MBB->back().isReturn())
for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(),
LOE = MRI->liveout_end(); LOI != LOE; ++LOI) {
unsigned Reg = *LOI;
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index 300f037..4ec75cd 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -454,7 +454,7 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
assert(!MI->isDebugValue() && "Won't process debug values");
const MCInstrDesc &MCID = MI->getDesc();
for (unsigned i = 0,
- e = MCID.isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
+ e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg())
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index a67140e..b5f107d 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -62,8 +62,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *MI = MBBI++;
// If MI is a pseudo, expand it.
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.usesCustomInsertionHook()) {
+ if (MI->usesCustomInsertionHook()) {
Changed = true;
MachineBasicBlock *NewMBB =
TLI->EmitInstrWithCustomInserter(MI, MBB);
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index e2a14a8..3d23db0 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -207,7 +207,7 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
++mi;
// Only expand pseudos.
- if (!MI->getDesc().isPseudo())
+ if (!MI->isPseudo())
continue;
// Give targets a chance to expand even standard pseudos.
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 9349797..e2c7132 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -386,7 +386,7 @@ void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
BBE = MF.end(); BBI != BBE; ++BBI)
for (MachineBasicBlock::iterator MI = BBI->begin(),
ME = BBI->end(); MI != ME; ++MI)
- if (MI->getDesc().isCall())
+ if (MI->isCall())
VisitCallPoint(MI);
}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index d888939..bd31fdf 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -573,12 +573,12 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
// blocks, move the end iterators up past any branch instructions.
while (TIE != TIB) {
--TIE;
- if (!TIE->getDesc().isBranch())
+ if (!TIE->isBranch())
break;
}
while (FIE != FIB) {
--FIE;
- if (!FIE->getDesc().isBranch())
+ if (!FIE->isBranch())
break;
}
@@ -651,12 +651,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
if (I->isDebugValue())
continue;
- const MCInstrDesc &MCID = I->getDesc();
- if (MCID.isNotDuplicable())
+ if (I->isNotDuplicable())
BBI.CannotBeCopied = true;
bool isPredicated = TII->isPredicated(I);
- bool isCondBr = BBI.IsBrAnalyzable && MCID.isConditionalBranch();
+ bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch();
if (!isCondBr) {
if (!isPredicated) {
@@ -1395,9 +1394,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
E = FromBBI.BB->end(); I != E; ++I) {
- const MCInstrDesc &MCID = I->getDesc();
// Do not copy the end of the block branches.
- if (IgnoreBr && MCID.isBranch())
+ if (IgnoreBr && I->isBranch())
break;
MachineInstr *MI = MF.CloneMachineInstr(I);
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 59907d9..9bf810e 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -759,7 +759,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
// Find all spills and copies of VNI.
for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg);
MachineInstr *MI = UI.skipInstruction();) {
- if (!MI->isCopy() && !MI->getDesc().mayStore())
+ if (!MI->isCopy() && !MI->mayStore())
continue;
SlotIndex Idx = LIS.getInstructionIndex(MI);
if (LI->getVNInfoAt(Idx) != VNI)
@@ -878,7 +878,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
// Before rematerializing into a register for a single instruction, try to
// fold a load into the instruction. That avoids allocating a new register.
- if (RM.OrigMI->getDesc().canFoldAsLoad() &&
+ if (RM.OrigMI->canFoldAsLoad() &&
foldMemoryOperand(MI, Ops, RM.OrigMI)) {
Edit->markRematerialized(RM.ParentVNI);
++NumFoldedLoads;
@@ -957,7 +957,7 @@ void InlineSpiller::reMaterializeAll() {
if (DeadDefs.empty())
return;
DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n");
- Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII);
+ Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII, RegsToSpill);
// Get rid of deleted and empty intervals.
for (unsigned i = RegsToSpill.size(); i != 0; --i) {
@@ -1240,7 +1240,7 @@ void InlineSpiller::spillAll() {
// Hoisted spills may cause dead code.
if (!DeadDefs.empty()) {
DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n");
- Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII);
+ Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII, RegsToSpill);
}
// Finally delete the SnippetCopies.
diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt
index 2eebb08..fee0347 100644
--- a/lib/CodeGen/LLVMBuild.txt
+++ b/lib/CodeGen/LLVMBuild.txt
@@ -15,9 +15,11 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = AsmPrinter SelectionDAG
+
[component_0]
type = Library
name = CodeGen
parent = Libraries
required_libraries = Analysis Core MC Scalar Support Target TransformUtils
-
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 03b5693..62227fd 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -41,10 +41,6 @@
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
-namespace llvm {
- bool EnableFastISel;
-}
-
static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
cl::desc("Disable Post Regalloc"));
static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
@@ -114,9 +110,10 @@ EnableFastISelOption("fast-isel", cl::Hidden,
LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
StringRef CPU, StringRef FS,
+ TargetOptions Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : TargetMachine(T, Triple, CPU, FS) {
+ : TargetMachine(T, Triple, CPU, FS, Options) {
CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL);
AsmInfo = T.createMCAsmInfo(Triple);
// TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
@@ -275,14 +272,15 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
return false; // success!
}
-static void printNoVerify(PassManagerBase &PM, const char *Banner) {
- if (PrintMachineCode)
+void LLVMTargetMachine::printNoVerify(PassManagerBase &PM,
+ const char *Banner) const {
+ if (Options.PrintMachineCode)
PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
}
-static void printAndVerify(PassManagerBase &PM,
- const char *Banner) {
- if (PrintMachineCode)
+void LLVMTargetMachine::printAndVerify(PassManagerBase &PM,
+ const char *Banner) const {
+ if (Options.PrintMachineCode)
PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
if (VerifyMachineCode)
@@ -380,7 +378,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
if (EnableFastISelOption == cl::BOU_TRUE ||
(getOptLevel() == CodeGenOpt::None &&
EnableFastISelOption != cl::BOU_FALSE))
- EnableFastISel = true;
+ Options.EnableFastISel = true;
// Ask the target for an isel.
if (addInstSelector(PM))
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index eb54baa7..c35302a 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -920,8 +920,8 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
}
// Don't insert anything after the first terminator, though.
- return MI->getDesc().isTerminator() ? MBB->getFirstTerminator() :
- llvm::next(MachineBasicBlock::iterator(MI));
+ return MI->isTerminator() ? MBB->getFirstTerminator() :
+ llvm::next(MachineBasicBlock::iterator(MI));
}
DebugLoc UserValue::findDebugLoc() {
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index edcfebe..1e58173 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -794,7 +794,7 @@ LiveIntervals::getLastSplitPoint(const LiveInterval &li,
MachineBasicBlock::iterator I = mbb->end(), B = mbb->begin();
while (I != B) {
--I;
- if (I->getDesc().isCall())
+ if (I->isCall())
return I;
}
// The block contains no calls that can throw, so use the first terminator.
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 2f283b2..a470877 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -129,7 +129,7 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM,
}
// If only cheap remats were requested, bail out early.
- if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove())
+ if (cheapAsAMove && !RM.OrigMI->isAsCheapAsAMove())
return false;
// Verify that all used registers are available with the same values.
@@ -174,7 +174,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
if (MO.isDef()) {
if (DefMI && DefMI != MI)
return false;
- if (!MI->getDesc().canFoldAsLoad())
+ if (!MI->canFoldAsLoad())
return false;
DefMI = MI;
} else if (!MO.isUndef()) {
@@ -210,7 +210,8 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
LiveIntervals &LIS, VirtRegMap &VRM,
- const TargetInstrInfo &TII) {
+ const TargetInstrInfo &TII,
+ ArrayRef<unsigned> RegsBeingSpilled) {
SetVector<LiveInterval*,
SmallVector<LiveInterval*, 8>,
SmallPtrSet<LiveInterval*, 8> > ToShrink;
@@ -290,6 +291,21 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
delegate_->LRE_WillShrinkVirtReg(LI->reg);
if (!LIS.shrinkToUses(LI, &Dead))
continue;
+
+ // Don't create new intervals for a register being spilled.
+ // The new intervals would have to be spilled anyway so its not worth it.
+ // Also they currently aren't spilled so creating them and not spilling
+ // them results in incorrect code.
+ bool BeingSpilled = false;
+ for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) {
+ if (LI->reg == RegsBeingSpilled[i]) {
+ BeingSpilled = true;
+ break;
+ }
+ }
+
+ if (BeingSpilled) continue;
+
// LI may have been separated, create new intervals.
LI->RenumberValues(LIS);
diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h
index 9b0a671..057d9bb 100644
--- a/lib/CodeGen/LiveRangeEdit.h
+++ b/lib/CodeGen/LiveRangeEdit.h
@@ -191,9 +191,14 @@ public:
/// eliminateDeadDefs - Try to delete machine instructions that are now dead
/// (allDefsAreDead returns true). This may cause live intervals to be trimmed
/// and further dead efs to be eliminated.
+ /// RegsBeingSpilled lists registers currently being spilled by the register
+ /// allocator. These registers should not be split into new intervals
+ /// as currently those new intervals are not guaranteed to spill.
void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
LiveIntervals&, VirtRegMap&,
- const TargetInstrInfo&);
+ const TargetInstrInfo&,
+ ArrayRef<unsigned> RegsBeingSpilled
+ = ArrayRef<unsigned>());
/// calculateRegClassAndHint - Recompute register class and hint for each new
/// register.
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 2ca90f9..7477d91 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -590,8 +590,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
// them. The tail callee need not take the same registers as input
// that it produces as output, and there are dependencies for its input
// registers elsewhere.
- if (!MBB->empty() && MBB->back().getDesc().isReturn()
- && !MBB->back().getDesc().isCall()) {
+ if (!MBB->empty() && MBB->back().isReturn()
+ && !MBB->back().isCall()) {
MachineInstr *Ret = &MBB->back();
for (MachineRegisterInfo::liveout_iterator
@@ -754,7 +754,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
const unsigned NumNew = BB->getNumber();
// All registers used by PHI nodes in SuccBB must be live through BB.
- for (MachineBasicBlock::const_iterator BBI = SuccBB->begin(),
+ for (MachineBasicBlock::iterator BBI = SuccBB->begin(),
BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI)
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
if (BBI->getOperand(i+1).getMBB() == BB)
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index b9d1ef7..6734916 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -73,7 +73,8 @@ void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
// Make sure the instructions have their operands in the reginfo lists.
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- for (MachineBasicBlock::iterator I = N->begin(), E = N->end(); I != E; ++I)
+ for (MachineBasicBlock::instr_iterator
+ I = N->instr_begin(), E = N->instr_end(); I != E; ++I)
I->AddRegOperandsToUseLists(RegInfo);
LeakDetector::removeGarbageObject(N);
@@ -120,8 +121,8 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
/// lists.
void ilist_traits<MachineInstr>::
transferNodesFromList(ilist_traits<MachineInstr> &fromList,
- MachineBasicBlock::iterator first,
- MachineBasicBlock::iterator last) {
+ ilist_iterator<MachineInstr> first,
+ ilist_iterator<MachineInstr> last) {
assert(Parent->getParent() == fromList.Parent->getParent() &&
"MachineInstr parent mismatch!");
@@ -140,9 +141,10 @@ void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
}
MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
- iterator I = begin();
+ instr_iterator I = instr_begin();
while (I != end() && I->isPHI())
++I;
+ assert(!I->isInsideBundle() && "First non-phi MI cannot be inside a bundle!");
return I;
}
@@ -150,23 +152,63 @@ MachineBasicBlock::iterator
MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
while (I != end() && (I->isPHI() || I->isLabel() || I->isDebugValue()))
++I;
+ // FIXME: This needs to change if we wish to bundle labels / dbg_values
+ // inside the bundle.
+ assert(!I->isInsideBundle() &&
+ "First non-phi / non-label instruction is inside a bundle!");
return I;
}
MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
iterator I = end();
- while (I != begin() && ((--I)->getDesc().isTerminator() || I->isDebugValue()))
+ while (I != begin() && ((--I)->isTerminator() || I->isDebugValue()))
; /*noop */
- while (I != end() && !I->getDesc().isTerminator())
+ while (I != end() && !I->isTerminator())
+ ++I;
+ return I;
+}
+
+MachineBasicBlock::const_iterator
+MachineBasicBlock::getFirstTerminator() const {
+ const_iterator I = end();
+ while (I != begin() && ((--I)->isTerminator() || I->isDebugValue()))
+ ; /*noop */
+ while (I != end() && !I->isTerminator())
+ ++I;
+ return I;
+}
+
+MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() {
+ instr_iterator I = instr_end();
+ while (I != instr_begin() && ((--I)->isTerminator() || I->isDebugValue()))
+ ; /*noop */
+ while (I != instr_end() && !I->isTerminator())
++I;
return I;
}
MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
- iterator B = begin(), I = end();
+ // Skip over end-of-block dbg_value instructions.
+ instr_iterator B = instr_begin(), I = instr_end();
while (I != B) {
--I;
- if (I->isDebugValue())
+ // Return instruction that starts a bundle.
+ if (I->isDebugValue() || I->isInsideBundle())
+ continue;
+ return I;
+ }
+ // The block is all debug values.
+ return end();
+}
+
+MachineBasicBlock::const_iterator
+MachineBasicBlock::getLastNonDebugInstr() const {
+ // Skip over end-of-block dbg_value instructions.
+ const_instr_iterator B = instr_begin(), I = instr_end();
+ while (I != B) {
+ --I;
+ // Return instruction that starts a bundle.
+ if (I->isDebugValue() || I->isInsideBundle())
continue;
return I;
}
@@ -203,8 +245,6 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
return;
}
- if (Alignment) { OS << "Alignment " << Alignment << "\n"; }
-
if (Indexes)
OS << Indexes->getMBBStartIdx(this) << '\t';
@@ -218,6 +258,12 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
}
if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
+ if (Alignment) {
+ OS << Comma << "Align " << Alignment << " (" << (1u << Alignment)
+ << " bytes)";
+ Comma = ", ";
+ }
+
OS << '\n';
const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
@@ -237,13 +283,15 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
OS << '\n';
}
- for (const_iterator I = begin(); I != end(); ++I) {
+ for (const_instr_iterator I = instr_begin(); I != instr_end(); ++I) {
if (Indexes) {
if (Indexes->hasIndex(I))
OS << Indexes->getInstructionIndex(I);
OS << '\t';
}
OS << '\t';
+ if (I->isInsideBundle())
+ OS << " * ";
I->print(OS, &getParent()->getTarget());
}
@@ -449,8 +497,8 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
fromMBB->removeSuccessor(Succ);
// Fix up any PHI nodes in the successor.
- for (MachineBasicBlock::iterator MI = Succ->begin(), ME = Succ->end();
- MI != ME && MI->isPHI(); ++MI)
+ for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(),
+ ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI)
for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) {
MachineOperand &MO = MI->getOperand(i);
if (MO.getMBB() == fromMBB)
@@ -492,8 +540,8 @@ bool MachineBasicBlock::canFallThrough() {
// Barrier is predicated and thus no longer an actual control barrier. This
// is over-conservative though, because if an instruction isn't actually
// predicated we could still treat it like a barrier.
- return empty() || !back().getDesc().isBarrier() ||
- back().getDesc().isPredicable();
+ return empty() || !back().isBarrier() ||
+ back().isPredicable();
}
// If there is no branch, control always falls through.
@@ -552,7 +600,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
// Collect a list of virtual registers killed by the terminators.
SmallVector<unsigned, 4> KilledRegs;
if (LV)
- for (iterator I = getFirstTerminator(), E = end(); I != E; ++I) {
+ for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+ I != E; ++I) {
MachineInstr *MI = I;
for (MachineInstr::mop_iterator OI = MI->operands_begin(),
OE = MI->operands_end(); OI != OE; ++OI) {
@@ -579,7 +628,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
}
// Fix PHI nodes in Succ so they refer to NMBB instead of this
- for (MachineBasicBlock::iterator i = Succ->begin(), e = Succ->end();
+ for (MachineBasicBlock::instr_iterator
+ i = Succ->instr_begin(),e = Succ->instr_end();
i != e && i->isPHI(); ++i)
for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
if (i->getOperand(ni+1).getMBB() == this)
@@ -595,7 +645,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
// Restore kills of virtual registers that were killed by the terminators.
while (!KilledRegs.empty()) {
unsigned Reg = KilledRegs.pop_back_val();
- for (iterator I = end(), E = begin(); I != E;) {
+ for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
if (!(--I)->addRegisterKilled(Reg, NULL, /* addIfNotFound= */ false))
continue;
LV->getVarInfo(Reg).Kills.push_back(I);
@@ -664,6 +714,41 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
return NMBB;
}
+MachineBasicBlock::iterator
+MachineBasicBlock::erase(MachineBasicBlock::iterator I) {
+ if (I->isBundle()) {
+ MachineBasicBlock::iterator E = llvm::next(I);
+ return Insts.erase(I.getInstrIterator(), E.getInstrIterator());
+ }
+
+ return Insts.erase(I.getInstrIterator());
+}
+
+MachineInstr *MachineBasicBlock::remove(MachineInstr *I) {
+ if (I->isBundle()) {
+ MachineBasicBlock::instr_iterator MII = I; ++MII;
+ while (MII != end() && MII->isInsideBundle()) {
+ MachineInstr *MI = &*MII++;
+ Insts.remove(MI);
+ }
+ }
+
+ return Insts.remove(I);
+}
+
+void MachineBasicBlock::splice(MachineBasicBlock::iterator where,
+ MachineBasicBlock *Other,
+ MachineBasicBlock::iterator From) {
+ if (From->isBundle()) {
+ MachineBasicBlock::iterator To = llvm::next(From);
+ Insts.splice(where.getInstrIterator(), Other->Insts,
+ From.getInstrIterator(), To.getInstrIterator());
+ return;
+ }
+
+ Insts.splice(where.getInstrIterator(), Other->Insts, From.getInstrIterator());
+}
+
/// removeFromParent - This method unlinks 'this' from the containing function,
/// and returns it, but does not delete it.
MachineBasicBlock *MachineBasicBlock::removeFromParent() {
@@ -687,10 +772,10 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
MachineBasicBlock *New) {
assert(Old != New && "Cannot replace self with self!");
- MachineBasicBlock::iterator I = end();
- while (I != begin()) {
+ MachineBasicBlock::instr_iterator I = instr_end();
+ while (I != instr_begin()) {
--I;
- if (!I->getDesc().isTerminator()) break;
+ if (!I->isTerminator()) break;
// Scan the operands of this machine instruction, replacing any uses of Old
// with New.
@@ -769,17 +854,17 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
/// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
/// any DBG_VALUE instructions. Return UnknownLoc if there is none.
DebugLoc
-MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) {
+MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
DebugLoc DL;
- MachineBasicBlock::iterator E = end();
- if (MBBI != E) {
- // Skip debug declarations, we don't want a DebugLoc from them.
- MachineBasicBlock::iterator MBBI2 = MBBI;
- while (MBBI2 != E && MBBI2->isDebugValue())
- MBBI2++;
- if (MBBI2 != E)
- DL = MBBI2->getDebugLoc();
- }
+ instr_iterator E = instr_end();
+ if (MBBI == E)
+ return DL;
+
+ // Skip debug declarations, we don't want a DebugLoc from them.
+ while (MBBI != E && MBBI->isDebugValue())
+ MBBI++;
+ if (MBBI != E)
+ DL = MBBI->getDebugLoc();
return DL;
}
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 55d804b..638d895 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -36,10 +36,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -56,22 +53,6 @@ STATISTIC(UncondBranchTakenFreq,
"Potential frequency of taking unconditional branches");
namespace {
-/// \brief A structure for storing a weighted edge.
-///
-/// This stores an edge and its weight, computed as the product of the
-/// frequency that the starting block is entered with the probability of
-/// a particular exit block.
-struct WeightedEdge {
- BlockFrequency EdgeFrequency;
- MachineBasicBlock *From, *To;
-
- bool operator<(const WeightedEdge &RHS) const {
- return EdgeFrequency < RHS.EdgeFrequency;
- }
-};
-}
-
-namespace {
class BlockChain;
/// \brief Type for our function-wide basic block -> block chain mapping.
typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType;
@@ -222,6 +203,9 @@ class MachineBlockPlacement : public MachineFunctionPass {
void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter = 0);
+ MachineBasicBlock *findBestLoopTop(MachineFunction &F,
+ MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet);
void buildLoopChains(MachineFunction &F, MachineLoop &L);
void buildCFGChains(MachineFunction &F);
void AlignLoops(MachineFunction &F);
@@ -546,12 +530,134 @@ void MachineBlockPlacement::buildChain(
markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
Chain.merge(BestSucc, &SuccChain);
BB = *llvm::prior(Chain.end());
- };
+ }
DEBUG(dbgs() << "Finished forming chain for header block "
<< getBlockNum(*Chain.begin()) << "\n");
}
+/// \brief Find the best loop top block for layout.
+///
+/// This routine implements the logic to analyze the loop looking for the best
+/// block to layout at the top of the loop. Typically this is done to maximize
+/// fallthrough opportunities.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopTop(MachineFunction &F,
+ MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
+ BlockFrequency BestExitEdgeFreq;
+ MachineBasicBlock *ExitingBB = 0;
+ MachineBasicBlock *LoopingBB = 0;
+ // If there are exits to outer loops, loop rotation can severely limit
+ // fallthrough opportunites unless it selects such an exit. Keep a set of
+ // blocks where rotating to exit with that block will reach an outer loop.
+ SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
+
+ DEBUG(dbgs() << "Finding best loop exit for: "
+ << getBlockName(L.getHeader()) << "\n");
+ for (MachineLoop::block_iterator I = L.block_begin(),
+ E = L.block_end();
+ I != E; ++I) {
+ BlockChain &Chain = *BlockToChain[*I];
+ // Ensure that this block is at the end of a chain; otherwise it could be
+ // mid-way through an inner loop or a successor of an analyzable branch.
+ if (*I != *llvm::prior(Chain.end()))
+ continue;
+
+ // Now walk the successors. We need to establish whether this has a viable
+ // exiting successor and whether it has a viable non-exiting successor.
+ // We store the old exiting state and restore it if a viable looping
+ // successor isn't found.
+ MachineBasicBlock *OldExitingBB = ExitingBB;
+ BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
+ // We also compute and store the best looping successor for use in layout.
+ MachineBasicBlock *BestLoopSucc = 0;
+ // FIXME: Due to the performance of the probability and weight routines in
+ // the MBPI analysis, we use the internal weights. This is only valid
+ // because it is purely a ranking function, we don't care about anything
+ // but the relative values.
+ uint32_t BestLoopSuccWeight = 0;
+ // FIXME: We also manually compute the probabilities to avoid quadratic
+ // behavior.
+ uint32_t WeightScale = 0;
+ uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale);
+ for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(),
+ SE = (*I)->succ_end();
+ SI != SE; ++SI) {
+ if ((*SI)->isLandingPad())
+ continue;
+ if (*SI == *I)
+ continue;
+ BlockChain &SuccChain = *BlockToChain[*SI];
+ // Don't split chains, either this chain or the successor's chain.
+ if (&Chain == &SuccChain || *SI != *SuccChain.begin()) {
+ DEBUG(dbgs() << " " << (LoopBlockSet.count(*SI) ? "looping: "
+ : "exiting: ")
+ << getBlockName(*I) << " -> "
+ << getBlockName(*SI) << " (chain conflict)\n");
+ continue;
+ }
+
+ uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI);
+ if (LoopBlockSet.count(*SI)) {
+ DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> "
+ << getBlockName(*SI) << " (" << SuccWeight << ")\n");
+ if (BestLoopSucc && BestLoopSuccWeight >= SuccWeight)
+ continue;
+
+ BestLoopSucc = *SI;
+ BestLoopSuccWeight = SuccWeight;
+ continue;
+ }
+
+ BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+ BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb;
+ DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> "
+ << getBlockName(*SI) << " (" << ExitEdgeFreq << ")\n");
+ // Note that we slightly bias this toward an existing layout successor to
+ // retain incoming order in the absence of better information.
+ // FIXME: Should we bias this more strongly? It's pretty weak.
+ if (!ExitingBB || ExitEdgeFreq > BestExitEdgeFreq ||
+ ((*I)->isLayoutSuccessor(*SI) &&
+ !(ExitEdgeFreq < BestExitEdgeFreq))) {
+ BestExitEdgeFreq = ExitEdgeFreq;
+ ExitingBB = *I;
+ }
+
+ if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI))
+ if (ExitLoop->contains(&L))
+ BlocksExitingToOuterLoop.insert(*I);
+ }
+
+ // Restore the old exiting state, no viable looping successor was found.
+ if (!BestLoopSucc) {
+ ExitingBB = OldExitingBB;
+ BestExitEdgeFreq = OldBestExitEdgeFreq;
+ continue;
+ }
+
+ // If this was best exiting block thus far, also record the looping block.
+ if (ExitingBB == *I)
+ LoopingBB = BestLoopSucc;
+ }
+ // Without a candidate exitting block or with only a single block in the
+ // loop, just use the loop header to layout the loop.
+ if (!ExitingBB || L.getNumBlocks() == 1)
+ return L.getHeader();
+
+ // Also, if we have exit blocks which lead to outer loops but didn't select
+ // one of them as the exiting block we are rotating toward, disable loop
+ // rotation altogether.
+ if (!BlocksExitingToOuterLoop.empty() &&
+ !BlocksExitingToOuterLoop.count(ExitingBB))
+ return L.getHeader();
+
+ assert(LoopingBB && "All successors of a loop block are exit blocks!");
+ DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n");
+ DEBUG(dbgs() << " Best top block: " << getBlockName(LoopingBB) << "\n");
+ return LoopingBB;
+}
+
/// \brief Forms basic block chains from the natural loop structures.
///
/// These chains are designed to preserve the existing *structure* of the code
@@ -567,17 +673,21 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
- BlockChain &LoopChain = *BlockToChain[L.getHeader()];
+
+ MachineBasicBlock *LayoutTop = findBestLoopTop(F, L, LoopBlockSet);
+ BlockChain &LoopChain = *BlockToChain[LayoutTop];
// FIXME: This is a really lame way of walking the chains in the loop: we
// walk the blocks, and use a set to prevent visiting a particular chain
// twice.
SmallPtrSet<BlockChain *, 4> UpdatedPreds;
+ assert(LoopChain.LoopPredecessors == 0);
+ UpdatedPreds.insert(&LoopChain);
for (MachineLoop::block_iterator BI = L.block_begin(),
BE = L.block_end();
BI != BE; ++BI) {
BlockChain &Chain = *BlockToChain[*BI];
- if (!UpdatedPreds.insert(&Chain) || BI == L.block_begin())
+ if (!UpdatedPreds.insert(&Chain))
continue;
assert(Chain.LoopPredecessors == 0);
@@ -597,7 +707,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
BlockWorkList.push_back(*Chain.begin());
}
- buildChain(*L.block_begin(), LoopChain, BlockWorkList, &LoopBlockSet);
+ buildChain(LayoutTop, LoopChain, BlockWorkList, &LoopBlockSet);
DEBUG({
// Crash at the end so we get all of the debugging output first.
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 7eda8c1..8c02cd7 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -260,12 +260,11 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
return false;
// Ignore stuff that we obviously can't move.
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.mayStore() || MCID.isCall() || MCID.isTerminator() ||
+ if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
MI->hasUnmodeledSideEffects())
return false;
- if (MCID.mayLoad()) {
+ if (MI->mayLoad()) {
// Okay, this instruction does a load. As a refinement, we allow the target
// to decide whether the loaded value is actually a constant. If so, we can
// actually use it as a load.
@@ -287,7 +286,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
// Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
// an immediate predecessor. We don't want to increase register pressure and
// end up causing other computation to be spilled.
- if (MI->getDesc().isAsCheapAsAMove()) {
+ if (MI->isAsCheapAsAMove()) {
MachineBasicBlock *CSBB = CSMI->getParent();
MachineBasicBlock *BB = MI->getParent();
if (CSBB != BB && !CSBB->isSuccessor(BB))
@@ -376,7 +375,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Commute commutable instructions.
bool Commuted = false;
- if (!FoundCSE && MI->getDesc().isCommutable()) {
+ if (!FoundCSE && MI->isCommutable()) {
MachineInstr *NewMI = TII->commuteInstruction(MI);
if (NewMI) {
Commuted = true;
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index b0ef9d4..ec5a1cd 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -178,6 +178,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
IsKill = isKill;
IsDead = isDead;
IsUndef = isUndef;
+ IsInternalRead = false;
IsEarlyClobber = false;
IsDebug = isDebug;
SubReg = 0;
@@ -240,7 +241,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
OS << PrintReg(getReg(), TRI, getSubReg());
if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
- isEarlyClobber()) {
+ isInternalRead() || isEarlyClobber()) {
OS << '<';
bool NeedComma = false;
if (isDef()) {
@@ -256,14 +257,26 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
NeedComma = true;
}
- if (isKill() || isDead() || isUndef()) {
+ if (isKill() || isDead() || isUndef() || isInternalRead()) {
if (NeedComma) OS << ',';
- if (isKill()) OS << "kill";
- if (isDead()) OS << "dead";
+ NeedComma = false;
+ if (isKill()) {
+ OS << "kill";
+ NeedComma = true;
+ }
+ if (isDead()) {
+ OS << "dead";
+ NeedComma = true;
+ }
if (isUndef()) {
- if (isKill() || isDead())
- OS << ',';
+ if (NeedComma) OS << ',';
OS << "undef";
+ NeedComma = true;
+ }
+ if (isInternalRead()) {
+ if (NeedComma) OS << ',';
+ OS << "internal";
+ NeedComma = true;
}
}
OS << '>';
@@ -735,6 +748,27 @@ void MachineInstr::addMemOperand(MachineFunction &MF,
MemRefsEnd = NewMemRefsEnd;
}
+bool
+MachineInstr::hasProperty(unsigned MCFlag, QueryType Type) const {
+ if (Type == IgnoreBundle || !isBundle())
+ return getDesc().getFlags() & (1 << MCFlag);
+
+ const MachineBasicBlock *MBB = getParent();
+ MachineBasicBlock::const_instr_iterator MII = *this; ++MII;
+ while (MII != MBB->end() && MII->isInsideBundle()) {
+ if (MII->getDesc().getFlags() & (1 << MCFlag)) {
+ if (Type == AnyInBundle)
+ return true;
+ } else {
+ if (Type == AllInBundle)
+ return false;
+ }
+ ++MII;
+ }
+
+ return Type == AllInBundle;
+}
+
bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
MICheckType Check) const {
// If opcodes or number of operands are not the same then the two
@@ -743,6 +777,19 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
Other->getNumOperands() != getNumOperands())
return false;
+ if (isBundle()) {
+ // Both instructions are bundles, compare MIs inside the bundle.
+ MachineBasicBlock::const_instr_iterator I1 = *this;
+ MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end();
+ MachineBasicBlock::const_instr_iterator I2 = *Other;
+ MachineBasicBlock::const_instr_iterator E2= Other->getParent()->instr_end();
+ while (++I1 != E1 && I1->isInsideBundle()) {
+ ++I2;
+ if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(I2, Check))
+ return false;
+ }
+ }
+
// Check operands to make sure they match.
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
@@ -789,6 +836,18 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
/// block, and returns it, but does not delete it.
MachineInstr *MachineInstr::removeFromParent() {
assert(getParent() && "Not embedded in a basic block!");
+
+ // If it's a bundle then remove the MIs inside the bundle as well.
+ if (isBundle()) {
+ MachineBasicBlock *MBB = getParent();
+ MachineBasicBlock::instr_iterator MII = *this; ++MII;
+ MachineBasicBlock::instr_iterator E = MBB->instr_end();
+ while (MII != E && MII->isInsideBundle()) {
+ MachineInstr *MI = &*MII;
+ ++MII;
+ MBB->remove(MI);
+ }
+ }
getParent()->remove(this);
return this;
}
@@ -798,6 +857,17 @@ MachineInstr *MachineInstr::removeFromParent() {
/// block, and deletes it.
void MachineInstr::eraseFromParent() {
assert(getParent() && "Not embedded in a basic block!");
+ // If it's a bundle then remove the MIs inside the bundle as well.
+ if (isBundle()) {
+ MachineBasicBlock *MBB = getParent();
+ MachineBasicBlock::instr_iterator MII = *this; ++MII;
+ MachineBasicBlock::instr_iterator E = MBB->instr_end();
+ while (MII != E && MII->isInsideBundle()) {
+ MachineInstr *MI = &*MII;
+ ++MII;
+ MBB->erase(MI);
+ }
+ }
getParent()->erase(this);
}
@@ -887,6 +957,20 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
return NULL;
}
+/// getBundleSize - Return the number of instructions inside the MI bundle.
+unsigned MachineInstr::getBundleSize() const {
+ assert(isBundle() && "Expecting a bundle");
+
+ MachineBasicBlock::const_instr_iterator I = *this;
+ unsigned Size = 0;
+ while ((++I)->isInsideBundle()) {
+ ++Size;
+ }
+ assert(Size > 1 && "Malformed bundle");
+
+ return Size;
+}
+
/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
/// the specific register or -1 if it is not found. It further tightens
/// the search criteria to a use that kills the register if isKill is true.
@@ -1118,6 +1202,8 @@ void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
/// copyPredicates - Copies predicate operand(s) from MI.
void MachineInstr::copyPredicates(const MachineInstr *MI) {
+ assert(!isBundle() && "MachineInstr::copyPredicates() can't handle bundles");
+
const MCInstrDesc &MCID = MI->getDesc();
if (!MCID.isPredicable())
return;
@@ -1159,13 +1245,13 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
AliasAnalysis *AA,
bool &SawStore) const {
// Ignore stuff that we obviously can't move.
- if (MCID->mayStore() || MCID->isCall()) {
+ if (mayStore() || isCall()) {
SawStore = true;
return false;
}
if (isLabel() || isDebugValue() ||
- MCID->isTerminator() || hasUnmodeledSideEffects())
+ isTerminator() || hasUnmodeledSideEffects())
return false;
// See if this instruction does a load. If so, we have to guarantee that the
@@ -1173,7 +1259,7 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
// destination. The check for isInvariantLoad gives the targe the chance to
// classify the load as always returning a constant, e.g. a constant pool
// load.
- if (MCID->mayLoad() && !isInvariantLoad(AA))
+ if (mayLoad() && !isInvariantLoad(AA))
// Otherwise, this is a real load. If there is a store between the load and
// end of block, or if the load is volatile, we can't move it.
return !SawStore && !hasVolatileMemoryRef();
@@ -1213,9 +1299,9 @@ bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
/// have no volatile memory references.
bool MachineInstr::hasVolatileMemoryRef() const {
// An instruction known never to access memory won't have a volatile access.
- if (!MCID->mayStore() &&
- !MCID->mayLoad() &&
- !MCID->isCall() &&
+ if (!mayStore() &&
+ !mayLoad() &&
+ !isCall() &&
!hasUnmodeledSideEffects())
return false;
@@ -1239,7 +1325,7 @@ bool MachineInstr::hasVolatileMemoryRef() const {
/// *all* loads the instruction does are invariant (if it does multiple loads).
bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
// If the instruction doesn't load at all, it isn't an invariant load.
- if (!MCID->mayLoad())
+ if (!mayLoad())
return false;
// If the instruction has lost its memoperands, conservatively assume that
@@ -1292,7 +1378,7 @@ unsigned MachineInstr::isConstantValuePHI() const {
}
bool MachineInstr::hasUnmodeledSideEffects() const {
- if (getDesc().hasUnmodeledSideEffects())
+ if (hasProperty(MCID::UnmodeledSideEffects))
return true;
if (isInlineAsm()) {
unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
@@ -1420,7 +1506,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
// call instructions much less noisy on targets where calls clobber lots
// of registers. Don't rely on MO.isDead() because we may be called before
// LiveVariables is run, or we may be looking at a non-allocatable reg.
- if (MF && getDesc().isCall() &&
+ if (MF && isCall() &&
MO.isReg() && MO.isImplicit() && MO.isDef()) {
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
new file mode 100644
index 0000000..b766d08
--- /dev/null
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -0,0 +1,180 @@
+//===-- lib/CodeGen/MachineInstrBundle.cpp --------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+namespace {
+ class UnpackMachineBundles : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification
+ UnpackMachineBundles() : MachineFunctionPass(ID) {
+ initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ };
+} // end anonymous namespace
+
+char UnpackMachineBundles::ID = 0;
+INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundle",
+ "Unpack machine instruction bundles", false, false)
+
+FunctionPass *llvm::createUnpackMachineBundlesPass() {
+ return new UnpackMachineBundles();
+}
+
+bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+
+ for (MachineBasicBlock::instr_iterator MII = MBB->instr_begin(),
+ MIE = MBB->instr_end(); MII != MIE; ) {
+ MachineInstr *MI = &*MII;
+
+ // Remove BUNDLE instruction and the InsideBundle flags from bundled
+ // instructions.
+ if (MI->isBundle()) {
+ while (++MII != MIE && MII->isInsideBundle()) {
+ MII->setIsInsideBundle(false);
+ for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MII->getOperand(i);
+ if (MO.isReg() && MO.isInternalRead())
+ MO.setIsInternalRead(false);
+ }
+ }
+ MI->eraseFromParent();
+
+ Changed = true;
+ continue;
+ }
+
+ ++MII;
+ }
+ }
+
+ return Changed;
+}
+
+/// FinalizeBundle - Finalize a machine instruction bundle which includes
+/// a sequence of instructions starting from FirstMI to LastMI (inclusive).
+/// This routine adds a BUNDLE instruction to represent the bundle, it adds
+/// IsInternalRead markers to MachineOperands which are defined inside the
+/// bundle, and it copies externally visible defs and uses to the BUNDLE
+/// instruction.
+void llvm::FinalizeBundle(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator FirstMI,
+ MachineBasicBlock::instr_iterator LastMI) {
+ const TargetMachine &TM = MBB.getParent()->getTarget();
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, FirstMI, FirstMI->getDebugLoc(),
+ TII->get(TargetOpcode::BUNDLE));
+
+ SmallVector<unsigned, 8> LocalDefs;
+ SmallSet<unsigned, 8> LocalDefSet;
+ SmallSet<unsigned, 8> DeadDefSet;
+ SmallSet<unsigned, 8> KilledDefSet;
+ SmallVector<unsigned, 8> ExternUses;
+ SmallSet<unsigned, 8> ExternUseSet;
+ SmallSet<unsigned, 8> KilledUseSet;
+ SmallSet<unsigned, 8> UndefUseSet;
+ SmallVector<MachineOperand*, 4> Defs;
+ do {
+ for (unsigned i = 0, e = FirstMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = FirstMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef()) {
+ Defs.push_back(&MO);
+ continue;
+ }
+
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ if (LocalDefSet.count(Reg)) {
+ MO.setIsInternalRead();
+ if (MO.isKill())
+ // Internal def is now killed.
+ KilledDefSet.insert(Reg);
+ } else {
+ if (ExternUseSet.insert(Reg)) {
+ ExternUses.push_back(Reg);
+ if (MO.isUndef())
+ UndefUseSet.insert(Reg);
+ }
+ if (MO.isKill())
+ // External def is now killed.
+ KilledUseSet.insert(Reg);
+ }
+ }
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ MachineOperand &MO = *Defs[i];
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (LocalDefSet.insert(Reg)) {
+ LocalDefs.push_back(Reg);
+ if (MO.isDead()) {
+ DeadDefSet.insert(Reg);
+ }
+ } else {
+ // Re-defined inside the bundle, it's no longer killed.
+ KilledDefSet.erase(Reg);
+ if (!MO.isDead())
+ // Previously defined but dead.
+ DeadDefSet.erase(Reg);
+ }
+
+ if (!MO.isDead()) {
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ unsigned SubReg = *SubRegs; ++SubRegs) {
+ if (LocalDefSet.insert(SubReg))
+ LocalDefs.push_back(SubReg);
+ }
+ }
+ }
+
+ FirstMI->setIsInsideBundle();
+ Defs.clear();
+ } while (FirstMI++ != LastMI);
+
+ SmallSet<unsigned, 8> Added;
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ unsigned Reg = LocalDefs[i];
+ if (Added.insert(Reg)) {
+ // If it's not live beyond end of the bundle, mark it dead.
+ bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg);
+ MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) |
+ getImplRegState(true));
+ }
+ }
+
+ for (unsigned i = 0, e = ExternUses.size(); i != e; ++i) {
+ unsigned Reg = ExternUses[i];
+ bool isKill = KilledUseSet.count(Reg);
+ bool isUndef = UndefUseSet.count(Reg);
+ MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) |
+ getImplRegState(true));
+ }
+}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index e5e8c51..764429d 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -765,7 +765,7 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
/// isLoadFromGOTOrConstantPool - Return true if this machine instruction
/// loads from global offset table or constant pool.
static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) {
- assert (MI.getDesc().mayLoad() && "Expected MI that loads!");
+ assert (MI.mayLoad() && "Expected MI that loads!");
for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
E = MI.memoperands_end(); I != E; ++I) {
if (const Value *V = (*I)->getValue()) {
@@ -792,7 +792,7 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
// from constant memory are not safe to speculate all the time, for example
// indexed load from a jump table.
// Stores and side effects are already checked by isSafeToMove.
- if (I.getDesc().mayLoad() && !isLoadFromGOTOrConstantPool(I) &&
+ if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) &&
!IsGuaranteedToExecute(I.getParent()))
return false;
@@ -921,7 +921,7 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
/// IsCheapInstruction - Return true if the instruction is marked "cheap" or
/// the operand latency between its def and a use is one or less.
bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
- if (MI.getDesc().isAsCheapAsAMove() || MI.isCopyLike())
+ if (MI.isAsCheapAsAMove() || MI.isCopyLike())
return true;
if (!InstrItins || InstrItins->isEmpty())
return false;
@@ -1105,7 +1105,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
// Don't unfold simple loads.
- if (MI->getDesc().canFoldAsLoad())
+ if (MI->canFoldAsLoad())
return 0;
// If not, we may be able to unfold a load and hoist that.
@@ -1141,8 +1141,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
assert(NewMIs.size() == 2 &&
"Unfolded a load into multiple instructions!");
MachineBasicBlock *MBB = MI->getParent();
- MBB->insert(MI, NewMIs[0]);
- MBB->insert(MI, NewMIs[1]);
+ MachineBasicBlock::iterator Pos = MI;
+ MBB->insert(Pos, NewMIs[0]);
+ MBB->insert(Pos, NewMIs[1]);
// If unfolding produced a load that wasn't loop-invariant or profitable to
// hoist, discard the new instructions and bail.
if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 84d6df2..8cb6112 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -81,7 +81,7 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
if (BB->empty())
return 0;
- MachineBasicBlock::iterator I = BB->front();
+ MachineBasicBlock::iterator I = BB->begin();
if (!I->isPHI())
return 0;
@@ -182,7 +182,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
return DupPHI;
// Otherwise, we do need a PHI: insert one now.
- MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
Loc, VRC, MRI, TII);
@@ -311,7 +311,7 @@ public:
/// Add it into the specified block and return the register.
static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds,
MachineSSAUpdater *Updater) {
- MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
Updater->VRC, Updater->MRI,
Updater->TII);
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 29cfb49..e47360d 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -90,6 +90,12 @@ namespace {
bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
MachineBasicBlock *DefMBB,
bool &BreakPHIEdge, bool &LocalUse) const;
+ MachineBasicBlock *FindSuccToSinkTo(MachineInstr *MI, MachineBasicBlock *MBB,
+ bool &BreakPHIEdge);
+ bool isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *SuccToSinkTo);
+
bool PerformTrivialForwardCoalescing(MachineInstr *MI,
MachineBasicBlock *MBB);
};
@@ -147,14 +153,10 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
"Only makes sense for vregs");
+ // Ignore debug uses because debug info doesn't affect the code.
if (MRI->use_nodbg_empty(Reg))
return true;
- // Ignoring debug uses is necessary so debug info doesn't affect the code.
- // This may leave a referencing dbg_value in the original block, before
- // the definition of the vreg. Dwarf generator handles this although the
- // user might not get the right info at runtime.
-
// BreakPHIEdge is true if all the uses are in the successor MBB being sunken
// into and they are all PHI nodes. In this case, machine-sink must break
// the critical edge first. e.g.
@@ -291,7 +293,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
if (!CEBCandidates.insert(std::make_pair(From, To)))
return true;
- if (!MI->isCopy() && !MI->getDesc().isAsCheapAsAMove())
+ if (!MI->isCopy() && !MI->isAsCheapAsAMove())
return true;
// MI is cheap, we probably don't want to break the critical edge for it.
@@ -401,35 +403,76 @@ static void collectDebugValues(MachineInstr *MI,
}
}
-/// SinkInstruction - Determine whether it is safe to sink the specified machine
-/// instruction out of its current block into a successor.
-bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
- // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
- // be close to the source to make it easier to coalesce.
- if (AvoidsSinking(MI, MRI))
+/// isPostDominatedBy - Return true if A is post dominated by B.
+static bool isPostDominatedBy(MachineBasicBlock *A, MachineBasicBlock *B) {
+
+ // FIXME - Use real post dominator.
+ if (A->succ_size() != 2)
+ return false;
+ MachineBasicBlock::succ_iterator I = A->succ_begin();
+ if (B == *I)
+ ++I;
+ MachineBasicBlock *OtherSuccBlock = *I;
+ if (OtherSuccBlock->succ_size() != 1 ||
+ *(OtherSuccBlock->succ_begin()) != B)
return false;
- // Check if it's safe to move the instruction.
- if (!MI->isSafeToMove(TII, AA, SawStore))
+ return true;
+}
+
+/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
+bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *SuccToSinkTo) {
+ assert (MI && "Invalid MachineInstr!");
+ assert (SuccToSinkTo && "Invalid SinkTo Candidate BB");
+
+ if (MBB == SuccToSinkTo)
return false;
- // FIXME: This should include support for sinking instructions within the
- // block they are currently in to shorten the live ranges. We often get
- // instructions sunk into the top of a large block, but it would be better to
- // also sink them down before their first use in the block. This xform has to
- // be careful not to *increase* register pressure though, e.g. sinking
- // "x = y + z" down if it kills y and z would increase the live ranges of y
- // and z and only shrink the live range of x.
+ // It is profitable if SuccToSinkTo does not post dominate current block.
+ if (!isPostDominatedBy(MBB, SuccToSinkTo))
+ return true;
+
+ // Check if only use in post dominated block is PHI instruction.
+ bool NonPHIUse = false;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+ I != E; ++I) {
+ MachineInstr *UseInst = &*I;
+ MachineBasicBlock *UseBlock = UseInst->getParent();
+ if (UseBlock == SuccToSinkTo && !UseInst->isPHI())
+ NonPHIUse = true;
+ }
+ if (!NonPHIUse)
+ return true;
+
+ // If SuccToSinkTo post dominates then also it may be profitable if MI
+ // can further profitably sinked into another block in next round.
+ bool BreakPHIEdge = false;
+ // FIXME - If finding successor is compile time expensive then catch results.
+ if (MachineBasicBlock *MBB2 = FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge))
+ return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2);
+
+ // If SuccToSinkTo is final destination and it is a post dominator of current
+ // block then it is not profitable to sink MI into SuccToSinkTo block.
+ return false;
+}
+
+/// FindSuccToSinkTo - Find a successor to sink this instruction to.
+MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ bool &BreakPHIEdge) {
+
+ assert (MI && "Invalid MachineInstr!");
+ assert (MBB && "Invalid MachineBasicBlock!");
// Loop over all the operands of the specified instruction. If there is
// anything we can't handle, bail out.
- MachineBasicBlock *ParentBlock = MI->getParent();
// SuccToSinkTo - This is the successor to sink this instruction to, once we
// decide.
MachineBasicBlock *SuccToSinkTo = 0;
-
- bool BreakPHIEdge = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue; // Ignore non-register operands.
@@ -443,23 +486,23 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
if (!MRI->def_empty(Reg))
- return false;
+ return NULL;
if (AllocatableSet.test(Reg))
- return false;
+ return NULL;
// Check for a def among the register's aliases too.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
if (!MRI->def_empty(AliasReg))
- return false;
+ return NULL;
if (AllocatableSet.test(AliasReg))
- return false;
+ return NULL;
}
} else if (!MO.isDead()) {
// A def that isn't dead. We can't move it.
- return false;
+ return NULL;
}
} else {
// Virtual register uses are always safe to sink.
@@ -467,7 +510,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// If it's not safe to move defs of the register class, then abort.
if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
- return false;
+ return NULL;
// FIXME: This picks a successor to sink into based on having one
// successor that dominates all the uses. However, there are cases where
@@ -488,48 +531,79 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// If a previous operand picked a block to sink to, then this operand
// must be sinkable to the same block.
bool LocalUse = false;
- if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock,
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB,
BreakPHIEdge, LocalUse))
- return false;
+ return NULL;
continue;
}
// Otherwise, we should look at all the successors and decide which one
// we should sink to.
- for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
- E = ParentBlock->succ_end(); SI != E; ++SI) {
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ E = MBB->succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccBlock = *SI;
bool LocalUse = false;
- if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock,
+ if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
BreakPHIEdge, LocalUse)) {
- SuccToSinkTo = *SI;
+ SuccToSinkTo = SuccBlock;
break;
}
if (LocalUse)
// Def is used locally, it's never safe to move this def.
- return false;
+ return NULL;
}
// If we couldn't find a block to sink to, ignore this instruction.
if (SuccToSinkTo == 0)
- return false;
+ return NULL;
+ else if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo))
+ return NULL;
}
}
- // If there are no outputs, it must have side-effects.
- if (SuccToSinkTo == 0)
- return false;
+ // It is not possible to sink an instruction into its own block. This can
+ // happen with loops.
+ if (MBB == SuccToSinkTo)
+ return NULL;
// It's not safe to sink instructions to EH landing pad. Control flow into
// landing pad is implicitly defined.
- if (SuccToSinkTo->isLandingPad())
+ if (SuccToSinkTo && SuccToSinkTo->isLandingPad())
+ return NULL;
+
+ return SuccToSinkTo;
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
+ // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
+ // be close to the source to make it easier to coalesce.
+ if (AvoidsSinking(MI, MRI))
return false;
- // It is not possible to sink an instruction into its own block. This can
- // happen with loops.
- if (MI->getParent() == SuccToSinkTo)
+ // Check if it's safe to move the instruction.
+ if (!MI->isSafeToMove(TII, AA, SawStore))
return false;
+ // FIXME: This should include support for sinking instructions within the
+ // block they are currently in to shorten the live ranges. We often get
+ // instructions sunk into the top of a large block, but it would be better to
+ // also sink them down before their first use in the block. This xform has to
+ // be careful not to *increase* register pressure though, e.g. sinking
+ // "x = y + z" down if it kills y and z would increase the live ranges of y
+ // and z and only shrink the live range of x.
+
+ bool BreakPHIEdge = false;
+ MachineBasicBlock *ParentBlock = MI->getParent();
+ MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge);
+
+ // If there are no outputs, it must have side-effects.
+ if (SuccToSinkTo == 0)
+ return false;
+
+
// If the instruction to move defines a dead physical register which is live
// when leaving the basic block, don't move it because it could turn into a
// "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index f231e3c..0a2c2f8 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -279,13 +279,17 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
MFI!=MFE; ++MFI) {
visitMachineBasicBlockBefore(MFI);
- for (MachineBasicBlock::const_iterator MBBI = MFI->begin(),
- MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+ for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(),
+ MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
if (MBBI->getParent() != MFI) {
report("Bad instruction parent pointer", MFI);
*OS << "Instruction: " << *MBBI;
continue;
}
+ // Skip BUNDLE instruction for now. FIXME: We should add code to verify
+ // the BUNDLE's specifically.
+ if (MBBI->isBundle())
+ continue;
visitMachineInstrBefore(MBBI);
for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
visitMachineOperand(&MBBI->getOperand(I), I);
@@ -435,7 +439,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
report("MBB exits via unconditional fall-through but its successor "
"differs from its CFG successor!", MBB);
}
- if (!MBB->empty() && MBB->back().getDesc().isBarrier() &&
+ if (!MBB->empty() && MBB->back().isBarrier() &&
!TII->isPredicated(&MBB->back())) {
report("MBB exits via unconditional fall-through but ends with a "
"barrier instruction!", MBB);
@@ -456,10 +460,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (MBB->empty()) {
report("MBB exits via unconditional branch but doesn't contain "
"any instructions!", MBB);
- } else if (!MBB->back().getDesc().isBarrier()) {
+ } else if (!MBB->back().isBarrier()) {
report("MBB exits via unconditional branch but doesn't end with a "
"barrier instruction!", MBB);
- } else if (!MBB->back().getDesc().isTerminator()) {
+ } else if (!MBB->back().isTerminator()) {
report("MBB exits via unconditional branch but the branch isn't a "
"terminator instruction!", MBB);
}
@@ -479,10 +483,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (MBB->empty()) {
report("MBB exits via conditional branch/fall-through but doesn't "
"contain any instructions!", MBB);
- } else if (MBB->back().getDesc().isBarrier()) {
+ } else if (MBB->back().isBarrier()) {
report("MBB exits via conditional branch/fall-through but ends with a "
"barrier instruction!", MBB);
- } else if (!MBB->back().getDesc().isTerminator()) {
+ } else if (!MBB->back().isTerminator()) {
report("MBB exits via conditional branch/fall-through but the branch "
"isn't a terminator instruction!", MBB);
}
@@ -499,10 +503,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (MBB->empty()) {
report("MBB exits via conditional branch/branch but doesn't "
"contain any instructions!", MBB);
- } else if (!MBB->back().getDesc().isBarrier()) {
+ } else if (!MBB->back().isBarrier()) {
report("MBB exits via conditional branch/branch but doesn't end with a "
"barrier instruction!", MBB);
- } else if (!MBB->back().getDesc().isTerminator()) {
+ } else if (!MBB->back().isTerminator()) {
report("MBB exits via conditional branch/branch but the branch "
"isn't a terminator instruction!", MBB);
}
@@ -555,9 +559,9 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
// Check the MachineMemOperands for basic consistency.
for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
E = MI->memoperands_end(); I != E; ++I) {
- if ((*I)->isLoad() && !MCID.mayLoad())
+ if ((*I)->isLoad() && !MI->mayLoad())
report("Missing mayLoad flag", MI);
- if ((*I)->isStore() && !MCID.mayStore())
+ if ((*I)->isStore() && !MI->mayStore())
report("Missing mayStore flag", MI);
}
@@ -575,7 +579,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
// Ensure non-terminators don't follow terminators.
- if (MCID.isTerminator()) {
+ if (MI->isTerminator()) {
if (!FirstTerminator)
FirstTerminator = MI;
} else if (FirstTerminator) {
@@ -606,7 +610,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
// Don't check if it's the last operand in a variadic instruction. See,
// e.g., LDM_RET in the arm back end.
if (MO->isReg() &&
- !(MCID.isVariadic() && MONum == MCID.getNumOperands()-1)) {
+ !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) {
if (MO->isDef() && !MCOI.isOptionalDef())
report("Explicit operand marked as def", MO, MONum);
if (MO->isImplicit())
@@ -614,7 +618,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
} else {
// ARM adds %reg0 operands to indicate predicates. We'll allow that.
- if (MO->isReg() && !MO->isImplicit() && !MCID.isVariadic() && MO->getReg())
+ if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg())
report("Extra explicit operand on non-variadic instruction", MO, MONum);
}
@@ -800,11 +804,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
LiveInts && !LiveInts->isNotInMIMap(MI)) {
LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
SlotIndex Idx = LiveInts->getInstructionIndex(MI);
- if (MCID.mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) {
+ if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) {
report("Instruction loads from dead spill slot", MO, MONum);
*OS << "Live stack: " << LI << '\n';
}
- if (MCID.mayStore() && !LI.liveAt(Idx.getRegSlot())) {
+ if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) {
report("Instruction stores to dead spill slot", MO, MONum);
*OS << "Live stack: " << LI << '\n';
}
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 6994aa5..0e52496 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -410,7 +410,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
return false; // Quick exit for basic blocks without PHIs.
bool Changed = false;
- for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end();
+ for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end();
BBI != BBE && BBI->isPHI(); ++BBI) {
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
unsigned Reg = BBI->getOperand(i).getReg();
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index bbc7ce2..2a5652a 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -292,7 +292,7 @@ bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI,
assert(Def && Src && "Malformed bitcast instruction!");
MachineInstr *DefMI = MRI->getVRegDef(Src);
- if (!DefMI || !DefMI->getDesc().isBitcast())
+ if (!DefMI || !DefMI->isBitcast())
return false;
unsigned SrcSrc = 0;
@@ -353,7 +353,7 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
SmallSet<unsigned, 4> &ImmDefRegs,
DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isMoveImmediate())
+ if (!MI->isMoveImmediate())
return false;
if (MCID.getNumDefs() != 1)
return false;
@@ -428,9 +428,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- const MCInstrDesc &MCID = MI->getDesc();
-
- if (MCID.isBitcast()) {
+ if (MI->isBitcast()) {
if (OptimizeBitcastInstr(MI, MBB)) {
// MI is deleted.
LocalMIs.erase(MI);
@@ -438,7 +436,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
MII = First ? I->begin() : llvm::next(PMII);
continue;
}
- } else if (MCID.isCompare()) {
+ } else if (MI->isCompare()) {
if (OptimizeCmpInstr(MI, MBB)) {
// MI is deleted.
LocalMIs.erase(MI);
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 7205ed6..fa832c8 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -212,7 +212,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
RegClassInfo.runOnMachineFunction(Fn);
// Check for explicit enable/disable of post-ra scheduling.
- TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = TargetSubtargetInfo::ANTIDEP_NONE;
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode =
+ TargetSubtargetInfo::ANTIDEP_NONE;
SmallVector<TargetRegisterClass*, 4> CriticalPathRCs;
if (EnablePostRAScheduler.getPosition() > 0) {
if (!EnablePostRAScheduler)
@@ -271,6 +272,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
}
I = MI;
--Count;
+ if (MI->isBundle())
+ Count -= MI->getBundleSize();
}
assert(Count == 0 && "Instruction count mismatch!");
assert((MBB->begin() == Current || CurrentCount != 0) &&
@@ -364,7 +367,7 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
KillIndices[i] = ~0u;
// Determine the live-out physregs for this block.
- if (!BB->empty() && BB->back().getDesc().isReturn()) {
+ if (!BB->empty() && BB->back().isReturn()) {
// In a return block, examine the function live-out regs.
for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
E = MRI.liveout_end(); I != E; ++I) {
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 32c9325..b4fd1cb 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -332,7 +332,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
// Skip over all terminator instructions, which are part of the return
// sequence.
MachineBasicBlock::iterator I2 = I;
- while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+ while (I2 != MBB->begin() && (--I2)->isTerminator())
I = I2;
bool AtStart = I == MBB->begin();
@@ -426,11 +426,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
// Skip over all terminator instructions, which are part of the
// return sequence.
- if (! I->getDesc().isTerminator()) {
+ if (! I->isTerminator()) {
++I;
} else {
MachineBasicBlock::iterator I2 = I;
- while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+ while (I2 != MBB->begin() && (--I2)->isTerminator())
I = I2;
}
}
@@ -698,7 +698,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
// Add epilogue to restore the callee-save registers in each exiting block
for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
// If last instruction is a return instruction, add an epilogue
- if (!I->empty() && I->back().getDesc().isReturn())
+ if (!I->empty() && I->back().isReturn())
TFI.emitEpilogue(Fn, *I);
}
@@ -706,7 +706,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
// we've been asked for it. This, when linked with a runtime with support
// for segmented stacks (libgcc is one), will result in allocating stack
// space in small chunks instead of one large contiguous block.
- if (EnableSegmentedStacks)
+ if (Fn.getTarget().Options.EnableSegmentedStacks)
TFI.adjustForSegmentedStacks(Fn);
}
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 4664a3c..c2656c5 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -748,8 +748,8 @@ void RAFast::AllocateBasicBlock() {
// and return are tail calls; do not do this for them. The tail callee need
// not take the same registers as input that it produces as output, and there
// are dependencies for its input registers elsewhere.
- if (!MBB->empty() && MBB->back().getDesc().isReturn() &&
- !MBB->back().getDesc().isCall()) {
+ if (!MBB->empty() && MBB->back().isReturn() &&
+ !MBB->back().isCall()) {
MachineInstr *Ret = &MBB->back();
for (MachineRegisterInfo::liveout_iterator
@@ -968,7 +968,7 @@ void RAFast::AllocateBasicBlock() {
}
unsigned DefOpEnd = MI->getNumOperands();
- if (MCID.isCall()) {
+ if (MI->isCall()) {
// Spill all virtregs before a call. This serves two purposes: 1. If an
// exception is thrown, the landing pad is going to expect to find
// registers in their spill slots, and 2. we don't have to wade through
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 845ee12..a053ccc 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -34,13 +34,14 @@
#include "LiveRangeEdit.h"
#include "RenderMachineFunction.h"
#include "Spiller.h"
-#include "Splitter.h"
#include "VirtRegMap.h"
#include "RegisterCoalescer.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/RegAllocPBQP.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -68,11 +69,6 @@ pbqpCoalescing("pbqp-coalescing",
cl::desc("Attempt coalescing during PBQP register allocation."),
cl::init(false), cl::Hidden);
-static cl::opt<bool>
-pbqpPreSplitting("pbqp-pre-splitting",
- cl::desc("Pre-split before PBQP register allocation."),
- cl::init(false), cl::Hidden);
-
namespace {
///
@@ -93,7 +89,6 @@ public:
initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
initializeLiveStacksPass(*PassRegistry::getPassRegistry());
initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
- initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
}
@@ -444,6 +439,9 @@ void PBQPBuilderWithCoalescing::addVirtRegCoalesce(
void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
+ au.setPreservesCFG();
+ au.addRequired<AliasAnalysis>();
+ au.addPreserved<AliasAnalysis>();
au.addRequired<SlotIndexes>();
au.addPreserved<SlotIndexes>();
au.addRequired<LiveIntervals>();
@@ -454,10 +452,10 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
au.addRequired<CalculateSpillWeights>();
au.addRequired<LiveStacks>();
au.addPreserved<LiveStacks>();
+ au.addRequired<MachineDominatorTree>();
+ au.addPreserved<MachineDominatorTree>();
au.addRequired<MachineLoopInfo>();
au.addPreserved<MachineLoopInfo>();
- if (pbqpPreSplitting)
- au.addRequired<LoopSplitter>();
au.addRequired<VirtRegMap>();
au.addRequired<RenderMachineFunction>();
MachineFunctionPass::getAnalysisUsage(au);
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 22d6a3b..cd181cd 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -651,8 +651,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
if (!DefMI)
return false;
- const MCInstrDesc &MCID = DefMI->getDesc();
- if (!MCID.isCommutable())
+ if (!DefMI->isCommutable())
return false;
// If DefMI is a two-address instruction then commuting it will change the
// destination register.
@@ -718,7 +717,8 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
return false;
if (NewMI != DefMI) {
LIS->ReplaceMachineInstrInMaps(DefMI, NewMI);
- MBB->insert(DefMI, NewMI);
+ MachineBasicBlock::iterator Pos = DefMI;
+ MBB->insert(Pos, NewMI);
MBB->erase(DefMI);
}
unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
@@ -809,14 +809,14 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
if (!DefMI)
return false;
assert(DefMI && "Defining instruction disappeared");
- const MCInstrDesc &MCID = DefMI->getDesc();
- if (!MCID.isAsCheapAsAMove())
+ if (!DefMI->isAsCheapAsAMove())
return false;
if (!TII->isTriviallyReMaterializable(DefMI, AA))
return false;
bool SawStore = false;
if (!DefMI->isSafeToMove(TII, AA, SawStore))
return false;
+ const MCInstrDesc &MCID = DefMI->getDesc();
if (MCID.getNumDefs() != 1)
return false;
if (!DefMI->isImplicitDef()) {
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 34b8ab0..4418f40 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -157,7 +157,7 @@ void ScheduleDAGInstrs::AddSchedBarrierDeps() {
MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0;
ExitSU.setInstr(ExitMI);
bool AllDepKnown = ExitMI &&
- (ExitMI->getDesc().isCall() || ExitMI->getDesc().isBarrier());
+ (ExitMI->isCall() || ExitMI->isBarrier());
if (ExitMI && AllDepKnown) {
// If it's a call or a barrier, add dependencies on the defs and uses of
// instruction.
@@ -238,13 +238,12 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
continue;
}
- const MCInstrDesc &MCID = MI->getDesc();
- assert(!MCID.isTerminator() && !MI->isLabel() &&
+ assert(!MI->isTerminator() && !MI->isLabel() &&
"Cannot schedule terminators or labels!");
// Create the SUnit for this MI.
SUnit *SU = NewSUnit(MI);
- SU->isCall = MCID.isCall();
- SU->isCommutable = MCID.isCommutable();
+ SU->isCall = MI->isCall();
+ SU->isCommutable = MI->isCommutable();
// Assign the Latency field of SU using target-provided information.
if (UnitLatencies)
@@ -278,8 +277,15 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
continue;
if (DefSU != SU &&
(Kind != SDep::Output || !MO.isDead() ||
- !DefSU->getInstr()->registerDefIsDead(Reg)))
- DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg));
+ !DefSU->getInstr()->registerDefIsDead(Reg))) {
+ if (Kind == SDep::Anti)
+ DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/Reg));
+ else {
+ unsigned AOLat = TII->getOutputLatency(InstrItins, MI, j,
+ DefSU->getInstr());
+ DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/Reg));
+ }
+ }
}
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
std::vector<SUnit *> &MemDefList = Defs[*Alias];
@@ -315,7 +321,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
if (RegUseIndex >= 0 &&
- (UseMCID.mayLoad() || UseMCID.mayStore()) &&
+ (UseMI->mayLoad() || UseMI->mayStore()) &&
(unsigned)RegUseIndex < UseMCID.getNumOperands() &&
UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
LDataLatency += SpecialAddressLatency;
@@ -419,9 +425,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// produce more precise dependence information.
#define STORE_LOAD_LATENCY 1
unsigned TrueMemOrderLatency = 0;
- if (MCID.isCall() || MI->hasUnmodeledSideEffects() ||
+ if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
(MI->hasVolatileMemoryRef() &&
- (!MCID.mayLoad() || !MI->isInvariantLoad(AA)))) {
+ (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) {
// Be conservative with these and add dependencies on all memory
// references, even those that are known to not alias.
for (std::map<const Value *, SUnit *>::iterator I =
@@ -460,7 +466,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
PendingLoads.clear();
AliasMemDefs.clear();
AliasMemUses.clear();
- } else if (MCID.mayStore()) {
+ } else if (MI->mayStore()) {
bool MayAlias = true;
TrueMemOrderLatency = STORE_LOAD_LATENCY;
if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
@@ -516,7 +522,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
/*Reg=*/0, /*isNormalMemory=*/false,
/*isMustAlias=*/false,
/*isArtificial=*/true));
- } else if (MCID.mayLoad()) {
+ } else if (MI->mayLoad()) {
bool MayAlias = true;
TrueMemOrderLatency = 0;
if (MI->isInvariantLoad(AA)) {
@@ -576,7 +582,7 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
// Simplistic target-independent heuristic: assume that loads take
// extra time.
- if (SU->getInstr()->getDesc().mayLoad())
+ if (SU->getInstr()->mayLoad())
SU->Latency += 2;
} else {
SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
@@ -658,39 +664,33 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
// EmitSchedule - Emit the machine code in scheduled order.
MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
- // For MachineInstr-based scheduling, we're rescheduling the instructions in
- // the block, so start by removing them from the block.
- while (Begin != InsertPos) {
- MachineBasicBlock::iterator I = Begin;
- ++Begin;
- BB->remove(I);
- }
+ Begin = InsertPos;
// If first instruction was a DBG_VALUE then put it back.
if (FirstDbgValue)
- BB->insert(InsertPos, FirstDbgValue);
+ BB->splice(InsertPos, BB, FirstDbgValue);
// Then re-insert them according to the given schedule.
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
if (SUnit *SU = Sequence[i])
- BB->insert(InsertPos, SU->getInstr());
+ BB->splice(InsertPos, BB, SU->getInstr());
else
// Null SUnit* is a noop.
EmitNoop();
- }
- // Update the Begin iterator, as the first instruction in the block
- // may have been scheduled later.
- if (!Sequence.empty())
- Begin = Sequence[0]->getInstr();
+ // Update the Begin iterator, as the first instruction in the block
+ // may have been scheduled later.
+ if (i == 0)
+ Begin = prior(InsertPos);
+ }
// Reinsert any remaining debug_values.
for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
MachineInstr *DbgValue = P.first;
- MachineInstr *OrigPrivMI = P.second;
- BB->insertAfter(OrigPrivMI, DbgValue);
+ MachineBasicBlock::iterator OrigPrivMI = P.second;
+ BB->splice(++OrigPrivMI, BB, DbgValue);
}
DbgValues.clear();
FirstDbgValue = NULL;
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index ff4184f..6023326 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -20,13 +20,3 @@ add_llvm_library(LLVMSelectionDAG
TargetLowering.cpp
TargetSelectionDAGInfo.cpp
)
-
-add_llvm_library_dependencies(LLVMSelectionDAG
- LLVMAnalysis
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
- )
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d8208a4..80cf0a8 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -180,7 +180,9 @@ namespace {
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
SDValue visitCTLZ(SDNode *N);
+ SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTTZ(SDNode *N);
+ SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTPOP(SDNode *N);
SDValue visitSELECT(SDNode *N);
SDValue visitSELECT_CC(SDNode *N);
@@ -361,6 +363,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
/// specified expression for the same cost as the expression itself, or 2 if we
/// can compute the negated form more cheaply than the expression itself.
static char isNegatibleForFree(SDValue Op, bool LegalOperations,
+ const TargetOptions *Options,
unsigned Depth = 0) {
// No compile time optimizations on this type.
if (Op.getValueType() == MVT::ppcf128)
@@ -383,34 +386,39 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
return LegalOperations ? 0 : 1;
case ISD::FADD:
// FIXME: determine better conditions for this xform.
- if (!UnsafeFPMath) return 0;
+ if (!Options->UnsafeFPMath) return 0;
// fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
- if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Options,
+ Depth + 1))
return V;
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
- return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, Options,
+ Depth + 1);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
- if (!UnsafeFPMath) return 0;
+ if (!Options->UnsafeFPMath) return 0;
// fold (fneg (fsub A, B)) -> (fsub B, A)
return 1;
case ISD::FMUL:
case ISD::FDIV:
- if (HonorSignDependentRoundingFPMath()) return 0;
+ if (Options->HonorSignDependentRoundingFPMath()) return 0;
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
- if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Options,
+ Depth + 1))
return V;
- return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, Options,
+ Depth + 1);
case ISD::FP_EXTEND:
case ISD::FP_ROUND:
case ISD::FSIN:
- return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1);
+ return isNegatibleForFree(Op.getOperand(0), LegalOperations, Options,
+ Depth + 1);
}
}
@@ -434,10 +442,11 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
}
case ISD::FADD:
// FIXME: determine better conditions for this xform.
- assert(UnsafeFPMath);
+ assert(DAG.getTarget().Options.UnsafeFPMath);
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
- if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+ &DAG.getTarget().Options, Depth+1))
return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
@@ -449,7 +458,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
Op.getOperand(0));
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
- assert(UnsafeFPMath);
+ assert(DAG.getTarget().Options.UnsafeFPMath);
// fold (fneg (fsub 0, B)) -> B
if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
@@ -462,10 +471,11 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
case ISD::FMUL:
case ISD::FDIV:
- assert(!HonorSignDependentRoundingFPMath());
+ assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath());
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
- if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+ &DAG.getTarget().Options, Depth+1))
return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
@@ -1070,7 +1080,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SRA: return visitSRA(N);
case ISD::SRL: return visitSRL(N);
case ISD::CTLZ: return visitCTLZ(N);
+ case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
case ISD::CTTZ: return visitCTTZ(N);
+ case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
case ISD::CTPOP: return visitCTPOP(N);
case ISD::SELECT: return visitSELECT(N);
case ISD::SELECT_CC: return visitSELECT_CC(N);
@@ -1769,7 +1781,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
N0, N1);
}
// fold (sdiv X, pow2) -> simple ops after legalize
- if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() &&
+ if (N1C && !N1C->isNullValue() &&
(N1C->getAPIntValue().isPowerOf2() ||
(-N1C->getAPIntValue()).isPowerOf2())) {
// If dividing by powers of two is cheap, then don't perform the following
@@ -3709,6 +3721,16 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctlz_zero_undef c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
SDValue DAGCombiner::visitCTTZ(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -3719,6 +3741,16 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (cttz_zero_undef c1) -> c2
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0);
+ return SDValue();
+}
+
SDValue DAGCombiner::visitCTPOP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -5254,20 +5286,22 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0);
// fold (fadd A, 0) -> A
- if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ N1CFP->getValueAPF().isZero())
return N0;
// fold (fadd A, (fneg B)) -> (fsub A, B)
- if (isNegatibleForFree(N1, LegalOperations) == 2)
+ if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options) == 2)
return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
// fold (fadd (fneg A), B) -> (fsub B, A)
- if (isNegatibleForFree(N0, LegalOperations) == 2)
+ if (isNegatibleForFree(N0, LegalOperations, &DAG.getTarget().Options) == 2)
return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
GetNegatedExpression(N0, DAG, LegalOperations));
// If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
- if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD &&
- N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+ if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+ N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
+ isa<ConstantFPSDNode>(N0.getOperand(1)))
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
N0.getOperand(1), N1));
@@ -5292,17 +5326,19 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (N0CFP && N1CFP && VT != MVT::ppcf128)
return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
// fold (fsub A, 0) -> A
- if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N1CFP && N1CFP->getValueAPF().isZero())
return N0;
// fold (fsub 0, B) -> -B
- if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) {
- if (isNegatibleForFree(N1, LegalOperations))
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N0CFP && N0CFP->getValueAPF().isZero()) {
+ if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
- if (isNegatibleForFree(N1, LegalOperations))
+ if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options))
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
@@ -5329,10 +5365,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0);
// fold (fmul A, 0) -> 0
- if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N1CFP && N1CFP->getValueAPF().isZero())
return N1;
// fold (fmul A, 0) -> 0, vector edition.
- if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode()))
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ ISD::isBuildVectorAllZeros(N1.getNode()))
return N1;
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
@@ -5343,8 +5381,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
// fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations,
+ &DAG.getTarget().Options)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations,
+ &DAG.getTarget().Options)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
@@ -5355,7 +5395,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
}
// If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
- if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL &&
+ if (DAG.getTarget().Options.UnsafeFPMath &&
+ N1CFP && N0.getOpcode() == ISD::FMUL &&
N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
@@ -5383,8 +5424,10 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations,
+ &DAG.getTarget().Options)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations,
+ &DAG.getTarget().Options)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
@@ -5637,7 +5680,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (isNegatibleForFree(N0, LegalOperations))
+ if (isNegatibleForFree(N0, LegalOperations, &DAG.getTarget().Options))
return GetNegatedExpression(N0, DAG, LegalOperations);
// Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
@@ -7162,19 +7205,23 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
return SDValue();
- // Combine:
- // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
- // Into:
- // indicies are equal => V1
- // otherwise => (extract_subvec V1, ExtIdx)
- //
- SDValue InsIdx = N->getOperand(1);
- SDValue ExtIdx = V->getOperand(2);
-
- if (InsIdx == ExtIdx)
- return V->getOperand(1);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
- V->getOperand(0), N->getOperand(1));
+ // Only handle cases where both indexes are constants with the same type.
+ ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
+
+ if (InsIdx && ExtIdx &&
+ InsIdx->getValueType(0).getSizeInBits() <= 64 &&
+ ExtIdx->getValueType(0).getSizeInBits() <= 64) {
+ // Combine:
+ // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
+ // Into:
+ // indices are equal => V1
+ // otherwise => (extract_subvec V1, ExtIdx)
+ if (InsIdx->getZExtValue() == ExtIdx->getZExtValue())
+ return V->getOperand(1);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
+ V->getOperand(0), N->getOperand(1));
+ }
}
return SDValue();
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index cff37c2..b4946ec 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -62,8 +62,11 @@
#include "llvm/ADT/Statistic.h"
using namespace llvm;
-STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by target-independent selector");
-STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by target-specific selector");
+STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
+ "target-independent selector");
+STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
+ "target-specific selector");
+STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
/// startNewBlock - Set the current block to which generated machine
/// instructions will be appended, and clear the local CSE map.
@@ -307,6 +310,18 @@ void FastISel::recomputeInsertPt() {
++FuncInfo.InsertPt;
}
+void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E) {
+ assert (I && E && std::distance(I, E) > 0 && "Invalid iterator!");
+ while (I != E) {
+ MachineInstr *Dead = &*I;
+ ++I;
+ Dead->eraseFromParent();
+ ++NumFastIselDead;
+ }
+ recomputeInsertPt();
+}
+
FastISel::SavePoint FastISel::enterLocalValueArea() {
MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
DebugLoc OldDL = DL;
@@ -792,19 +807,33 @@ FastISel::SelectInstruction(const Instruction *I) {
DL = I->getDebugLoc();
+ MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt;
+
// First, try doing target-independent selection.
if (SelectOperator(I, I->getOpcode())) {
++NumFastIselSuccessIndependent;
DL = DebugLoc();
return true;
}
+ // Remove dead code. However, ignore call instructions since we've flushed
+ // the local value map and recomputed the insert point.
+ if (!isa<CallInst>(I)) {
+ recomputeInsertPt();
+ if (SavedInsertPt != FuncInfo.InsertPt)
+ removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
+ }
// Next, try calling the target to attempt to handle the instruction.
+ SavedInsertPt = FuncInfo.InsertPt;
if (TargetSelectInstruction(I)) {
++NumFastIselSuccessTarget;
DL = DebugLoc();
return true;
}
+ // Check for dead code and remove as necessary.
+ recomputeInsertPt();
+ if (SavedInsertPt != FuncInfo.InsertPt)
+ removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
DL = DebugLoc();
return false;
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 2ff66f8..cb6fd53 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -294,7 +294,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
const TargetRegisterClass *DstRC = 0;
if (IIOpNum < II->getNumOperands())
DstRC = TII->getRegClass(*II, IIOpNum, TRI);
- assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
+ assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
"Don't have operand info for this instruction!");
if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
unsigned NewVReg = MRI->createVirtualRegister(DstRC);
diff --git a/lib/CodeGen/SelectionDAG/LLVMBuild.txt b/lib/CodeGen/SelectionDAG/LLVMBuild.txt
index 10a849f..81d2e00 100644
--- a/lib/CodeGen/SelectionDAG/LLVMBuild.txt
+++ b/lib/CodeGen/SelectionDAG/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = SelectionDAG
parent = CodeGen
required_libraries = Analysis CodeGen Core MC Support Target TransformUtils
-
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 156cc70..75f5761 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -785,7 +785,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::FRAME_TO_ARGS_OFFSET:
case ISD::EH_SJLJ_SETJMP:
case ISD::EH_SJLJ_LONGJMP:
- case ISD::EH_SJLJ_DISPATCHSETUP:
// These operations lie about being legal: when they claim to be legal,
// they should actually be expanded.
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -2383,6 +2382,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
return Op;
}
+ case ISD::CTLZ_ZERO_UNDEF:
+ // This trivially expands to CTLZ.
+ return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op);
case ISD::CTLZ: {
// for now, we do this:
// x = x | (x >> 1);
@@ -2404,6 +2406,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
Op = DAG.getNOT(dl, Op, VT);
return DAG.getNode(ISD::CTPOP, dl, VT, Op);
}
+ case ISD::CTTZ_ZERO_UNDEF:
+ // This trivially expands to CTTZ.
+ return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op);
case ISD::CTTZ: {
// for now, we use: { return popcount(~x & (x - 1)); }
// unless the target has ctlz but not ctpop, in which case we use:
@@ -2518,7 +2523,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
switch (Node->getOpcode()) {
case ISD::CTPOP:
case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
Results.push_back(Tmp1);
break;
@@ -2538,7 +2545,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::PREFETCH:
case ISD::VAEND:
case ISD::EH_SJLJ_LONGJMP:
- case ISD::EH_SJLJ_DISPATCHSETUP:
// If the target didn't expand these, there's nothing to do, so just
// preserve the chain and be done.
Results.push_back(Node->getOperand(0));
@@ -3421,20 +3427,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
SDValue Tmp1, Tmp2, Tmp3;
switch (Node->getOpcode()) {
case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTPOP:
// Zero extend the argument.
Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
- // Perform the larger operation.
+ // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is
+ // already the correct result.
Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
if (Node->getOpcode() == ISD::CTTZ) {
- //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
+ // FIXME: This should set a bit in the zero extended value instead.
Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT),
Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
ISD::SETEQ);
Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2,
DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);
- } else if (Node->getOpcode() == ISD::CTLZ) {
+ } else if (Node->getOpcode() == ISD::CTLZ ||
+ Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
// Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
DAG.getConstant(NVT.getSizeInBits() -
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index fd24238..1c02c4f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -56,8 +56,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
case ISD::CONVERT_RNDSAT:
Res = PromoteIntRes_CONVERT_RNDSAT(N); break;
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break;
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
case ISD::EXTRACT_VECTOR_ELT:
Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
@@ -216,7 +218,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
case TargetLowering::TypeLegal:
break;
case TargetLowering::TypePromoteInteger:
- if (NOutVT.bitsEq(NInVT))
+ if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector())
// The input promotes to the same size. Convert the promoted value.
return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
break;
@@ -311,7 +313,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
EVT OVT = N->getValueType(0);
EVT NVT = Op.getValueType();
- Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op);
+ Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
// Subtract off the extra leading bits in the bigger type.
return DAG.getNode(ISD::SUB, dl, NVT, Op,
DAG.getConstant(NVT.getSizeInBits() -
@@ -329,13 +331,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
EVT OVT = N->getValueType(0);
EVT NVT = Op.getValueType();
DebugLoc dl = N->getDebugLoc();
- // The count is the same in the promoted type except if the original
- // value was zero. This can be handled by setting the bit just off
- // the top of the original type.
- APInt TopBit(NVT.getSizeInBits(), 0);
- TopBit.setBit(OVT.getSizeInBits());
- Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
- return DAG.getNode(ISD::CTTZ, dl, NVT, Op);
+ if (N->getOpcode() == ISD::CTTZ) {
+ // The count is the same in the promoted type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ APInt TopBit(NVT.getSizeInBits(), 0);
+ TopBit.setBit(OVT.getSizeInBits());
+ Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
+ }
+ return DAG.getNode(N->getOpcode(), dl, NVT, Op);
}
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -1097,8 +1101,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;
case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break;
case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
@@ -1701,8 +1707,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi,
DAG.getConstant(0, NVT), ISD::SETNE);
- SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo);
- SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi);
+ SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi);
Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ,
DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
@@ -1731,8 +1737,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo,
DAG.getConstant(0, NVT), ISD::SETNE);
- SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo);
- SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi);
+ SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi);
Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ,
DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 4e02b90..4696c0d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -185,8 +185,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::SRL:
case ISD::ROTL:
case ISD::ROTR:
- case ISD::CTTZ:
case ISD::CTLZ:
+ case ISD::CTTZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTPOP:
case ISD::SELECT:
case ISD::VSELECT:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index ad83565..7ca0d1e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -441,8 +441,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ANY_EXTEND:
case ISD::CONVERT_RNDSAT:
case ISD::CTLZ:
- case ISD::CTPOP:
case ISD::CTTZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTPOP:
case ISD::FABS:
case ISD::FCEIL:
case ISD::FCOS:
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index cd0da37..80162d7 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -102,17 +102,6 @@ static cl::opt<unsigned> AvgIPC(
"sched-avg-ipc", cl::Hidden, cl::init(1),
cl::desc("Average inst/cycle whan no target itinerary exists."));
-#ifndef NDEBUG
-namespace {
- // For sched=list-ilp, Count the number of times each factor comes into play.
- enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
- FactStatic, FactOther, NumFactors };
-}
-static const char *FactorName[NumFactors] =
-{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
-static int FactorCount[NumFactors];
-#endif //!NDEBUG
-
namespace {
//===----------------------------------------------------------------------===//
/// ScheduleDAGRRList - The actual register reduction list scheduler
@@ -157,6 +146,10 @@ private:
/// and similar queries.
ScheduleDAGTopologicalSort Topo;
+ // Hack to keep track of the inverse of FindCallSeqStart without more crazy
+ // DAG crawling.
+ DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
+
public:
ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
SchedulingPriorityQueue *availqueue,
@@ -308,11 +301,6 @@ void ScheduleDAGRRList::Schedule() {
DEBUG(dbgs()
<< "********** List Scheduling BB#" << BB->getNumber()
<< " '" << BB->getName() << "' **********\n");
-#ifndef NDEBUG
- for (int i = 0; i < NumFactors; ++i) {
- FactorCount[i] = 0;
- }
-#endif //!NDEBUG
CurCycle = 0;
IssueCount = 0;
@@ -322,6 +310,7 @@ void ScheduleDAGRRList::Schedule() {
// to track the virtual resource of a calling sequence.
LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL);
LiveRegGens.resize(TRI->getNumRegs() + 1, NULL);
+ CallSeqEndForStart.clear();
// Build the scheduling graph.
BuildSchedGraph(NULL);
@@ -337,11 +326,6 @@ void ScheduleDAGRRList::Schedule() {
// Execute the actual scheduling loop.
ListScheduleBottomUp();
-#ifndef NDEBUG
- for (int i = 0; i < NumFactors; ++i) {
- DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n");
- }
-#endif // !NDEBUG
AvailableQueue->releaseState();
}
@@ -545,6 +529,8 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
SUnit *Def = &SUnits[N->getNodeId()];
+ CallSeqEndForStart[Def] = SU;
+
++NumLiveRegs;
LiveRegDefs[CallResource] = Def;
LiveRegGens[CallResource] = SU;
@@ -811,7 +797,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
++NumLiveRegs;
LiveRegDefs[CallResource] = SU;
- LiveRegGens[CallResource] = NULL;
+ LiveRegGens[CallResource] = CallSeqEndForStart[SU];
}
}
@@ -832,12 +818,11 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
if (I->isAssignedRegDep()) {
+ if (!LiveRegDefs[I->getReg()])
+ ++NumLiveRegs;
// This becomes the nearest def. Note that an earlier def may still be
// pending if this is a two-address node.
LiveRegDefs[I->getReg()] = SU;
- if (!LiveRegDefs[I->getReg()]) {
- ++NumLiveRegs;
- }
if (LiveRegGens[I->getReg()] == NULL ||
I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
LiveRegGens[I->getReg()] = I->getSUnit();
@@ -2296,28 +2281,20 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
// If scheduling either one of the node will cause a pipeline stall, sort
// them according to their height.
if (LStall) {
- if (!RStall) {
- DEBUG(++FactorCount[FactStall]);
+ if (!RStall)
return 1;
- }
- if (LHeight != RHeight) {
- DEBUG(++FactorCount[FactStall]);
+ if (LHeight != RHeight)
return LHeight > RHeight ? 1 : -1;
- }
- } else if (RStall) {
- DEBUG(++FactorCount[FactStall]);
+ } else if (RStall)
return -1;
- }
// If either node is scheduling for latency, sort them by height/depth
// and latency.
if (!checkPref || (left->SchedulingPref == Sched::ILP ||
right->SchedulingPref == Sched::ILP)) {
if (DisableSchedCycles) {
- if (LHeight != RHeight) {
- DEBUG(++FactorCount[FactHeight]);
+ if (LHeight != RHeight)
return LHeight > RHeight ? 1 : -1;
- }
}
else {
// If neither instruction stalls (!LStall && !RStall) then
@@ -2326,17 +2303,14 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
int LDepth = left->getDepth() - LPenalty;
int RDepth = right->getDepth() - RPenalty;
if (LDepth != RDepth) {
- DEBUG(++FactorCount[FactDepth]);
DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
<< ") depth " << LDepth << " vs SU (" << right->NodeNum
<< ") depth " << RDepth << "\n");
return LDepth < RDepth ? 1 : -1;
}
}
- if (left->Latency != right->Latency) {
- DEBUG(++FactorCount[FactOther]);
+ if (left->Latency != right->Latency)
return left->Latency > right->Latency ? 1 : -1;
- }
}
return 0;
}
@@ -2350,7 +2324,6 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
bool LHasPhysReg = left->hasPhysRegDefs;
bool RHasPhysReg = right->hasPhysRegDefs;
if (LHasPhysReg != RHasPhysReg) {
- DEBUG(++FactorCount[FactRegUses]);
#ifndef NDEBUG
const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
#endif
@@ -2376,10 +2349,8 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
}
- if (LPriority != RPriority) {
- DEBUG(++FactorCount[FactStatic]);
+ if (LPriority != RPriority)
return LPriority > RPriority;
- }
// One or both of the nodes are calls and their sethi-ullman numbers are the
// same, then keep source order.
@@ -2412,18 +2383,14 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
// This creates more short live intervals.
unsigned LDist = closestSucc(left);
unsigned RDist = closestSucc(right);
- if (LDist != RDist) {
- DEBUG(++FactorCount[FactOther]);
+ if (LDist != RDist)
return LDist < RDist;
- }
// How many registers becomes live when the node is scheduled.
unsigned LScratch = calcMaxScratches(left);
unsigned RScratch = calcMaxScratches(right);
- if (LScratch != RScratch) {
- DEBUG(++FactorCount[FactOther]);
+ if (LScratch != RScratch)
return LScratch > RScratch;
- }
// Comparing latency against a call makes little sense unless the node
// is register pressure-neutral.
@@ -2438,20 +2405,15 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
return result > 0;
}
else {
- if (left->getHeight() != right->getHeight()) {
- DEBUG(++FactorCount[FactHeight]);
+ if (left->getHeight() != right->getHeight())
return left->getHeight() > right->getHeight();
- }
- if (left->getDepth() != right->getDepth()) {
- DEBUG(++FactorCount[FactDepth]);
+ if (left->getDepth() != right->getDepth())
return left->getDepth() < right->getDepth();
- }
}
assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero");
- DEBUG(++FactorCount[FactOther]);
return (left->NodeQueueId > right->NodeQueueId);
}
@@ -2511,13 +2473,11 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
// Avoid causing spills. If register pressure is high, schedule for
// register pressure reduction.
if (LHigh && !RHigh) {
- DEBUG(++FactorCount[FactPressureDiff]);
DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
<< right->NodeNum << ")\n");
return true;
}
else if (!LHigh && RHigh) {
- DEBUG(++FactorCount[FactPressureDiff]);
DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
<< left->NodeNum << ")\n");
return false;
@@ -2581,7 +2541,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
}
if (!DisableSchedRegPressure && LPDiff != RPDiff) {
- DEBUG(++FactorCount[FactPressureDiff]);
DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
<< " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
return LPDiff > RPDiff;
@@ -2590,7 +2549,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
bool LReduce = canEnableCoalescing(left);
bool RReduce = canEnableCoalescing(right);
- DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]);
if (LReduce && !RReduce) return false;
if (RReduce && !LReduce) return true;
}
@@ -2598,17 +2556,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
<< " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
- DEBUG(++FactorCount[FactRegUses]);
return LLiveUses < RLiveUses;
}
if (!DisableSchedStalls) {
bool LStall = BUHasStall(left, left->getHeight(), SPQ);
bool RStall = BUHasStall(right, right->getHeight(), SPQ);
- if (LStall != RStall) {
- DEBUG(++FactorCount[FactHeight]);
+ if (LStall != RStall)
return left->getHeight() > right->getHeight();
- }
}
if (!DisableSchedCriticalPath) {
@@ -2617,17 +2572,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
<< left->getDepth() << " != SU(" << right->NodeNum << "): "
<< right->getDepth() << "\n");
- DEBUG(++FactorCount[FactDepth]);
return left->getDepth() < right->getDepth();
}
}
if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
int spread = (int)left->getHeight() - (int)right->getHeight();
- if (std::abs(spread) > MaxReorderWindow) {
- DEBUG(++FactorCount[FactHeight]);
+ if (std::abs(spread) > MaxReorderWindow)
return left->getHeight() > right->getHeight();
- }
}
return BURRSort(left, right, SPQ);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 497c286..dd626e2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -840,9 +840,9 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
}
// EntryNode could meaningfully have debug info if we can find it...
-SelectionDAG::SelectionDAG(const TargetMachine &tm)
+SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
: TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
- EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
+ OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
Root(getEntryNode()), Ordering(0) {
AllNodes.push_back(&EntryNode);
Ordering = new SDNodeOrdering();
@@ -1856,7 +1856,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
return;
}
case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTPOP: {
unsigned LowBits = Log2_32(BitWidth)+1;
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
@@ -2334,7 +2336,7 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
// If we're told that NaNs won't happen, assume they won't.
- if (NoNaNsFPMath)
+ if (getTarget().Options.NoNaNsFPMath)
return true;
// If the value is a constant, we can obviously see if it is a NaN or not.
@@ -2429,8 +2431,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
case ISD::CTPOP:
return getConstant(Val.countPopulation(), VT);
case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
return getConstant(Val.countLeadingZeros(), VT);
case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
return getConstant(Val.countTrailingZeros(), VT);
}
}
@@ -2607,7 +2611,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
break;
case ISD::FNEG:
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
- if (UnsafeFPMath && OpOpcode == ISD::FSUB)
+ if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
Operand.getNode()->getOperand(0));
if (OpOpcode == ISD::FNEG) // --X -> X
@@ -2742,7 +2746,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
- if (UnsafeFPMath) {
+ if (getTarget().Options.UnsafeFPMath) {
if (Opcode == ISD::FADD) {
// 0+x --> x
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
@@ -3065,7 +3069,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM:
- if (UnsafeFPMath)
+ if (getTarget().Options.UnsafeFPMath)
return N2;
break;
case ISD::MUL:
@@ -4914,6 +4918,20 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
return N;
}
+/// UpdadeDebugLocOnMergedSDNode - If the opt level is -O0 then it throws away
+/// the line number information on the merged node since it is not possible to
+/// preserve the information that operation is associated with multiple lines.
+/// This will make the debugger working better at -O0, were there is a higher
+/// probability having other instructions associated with that line.
+///
+SDNode *SelectionDAG::UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc OLoc) {
+ DebugLoc NLoc = N->getDebugLoc();
+ if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && (OLoc != NLoc)) {
+ N->setDebugLoc(DebugLoc());
+ }
+ return N;
+}
+
/// MorphNodeTo - This *mutates* the specified node to have the specified
/// return type, opcode, and operands.
///
@@ -4935,7 +4953,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
- return ON;
+ return UpdadeDebugLocOnMergedSDNode(ON, N->getDebugLoc());
}
if (!RemoveNodeFromCSEMaps(N))
@@ -5139,8 +5157,9 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
FoldingSetNodeID ID;
AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps);
IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
- return cast<MachineSDNode>(E);
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ return cast<MachineSDNode>(UpdadeDebugLocOnMergedSDNode(E, DL));
+ }
}
// Allocate a new MachineSDNode.
@@ -5943,7 +5962,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::EH_RETURN: return "EH_RETURN";
case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
- case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP";
case ISD::ConstantPool: return "ConstantPool";
case ISD::ExternalSymbol: return "ExternalSymbol";
case ISD::BlockAddress: return "BlockAddress";
@@ -6112,10 +6130,12 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::TRAP: return "trap";
// Bit manipulation
- case ISD::BSWAP: return "bswap";
- case ISD::CTPOP: return "ctpop";
- case ISD::CTTZ: return "cttz";
- case ISD::CTLZ: return "ctlz";
+ case ISD::BSWAP: return "bswap";
+ case ISD::CTPOP: return "ctpop";
+ case ISD::CTTZ: return "cttz";
+ case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef";
+ case ISD::CTLZ: return "ctlz";
+ case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef";
// Trampolines
case ISD::INIT_TRAMPOLINE: return "init_trampoline";
@@ -6146,6 +6166,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SETLT: return "setlt";
case ISD::SETLE: return "setle";
case ISD::SETNE: return "setne";
+
+ case ISD::SETTRUE: return "settrue";
+ case ISD::SETTRUE2: return "settrue2";
+ case ISD::SETFALSE: return "setfalse";
+ case ISD::SETFALSE2: return "setfalse2";
}
}
}
@@ -6554,20 +6579,15 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
const GlobalValue *GV;
int64_t GVOffset = 0;
if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
- // If GV has specified alignment, then use it. Otherwise, use the preferred
- // alignment.
- unsigned Align = GV->getAlignment();
- if (!Align) {
- if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
- if (GVar->hasInitializer()) {
- const TargetData *TD = TLI.getTargetData();
- Align = TD->getPreferredAlignment(GVar);
- }
- }
- if (!Align)
- Align = TLI.getTargetData()->getABITypeAlignment(GV->getType());
- }
- return MinAlign(Align, GVOffset);
+ unsigned PtrWidth = TLI.getPointerTy().getSizeInBits();
+ APInt AllOnes = APInt::getAllOnesValue(PtrWidth);
+ APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
+ llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), AllOnes,
+ KnownZero, KnownOne, TLI.getTargetData());
+ unsigned AlignBits = KnownZero.countTrailingOnes();
+ unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
+ if (Align)
+ return MinAlign(Align, GVOffset);
}
// If this is a direct reference to a stack slot, use information about the
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8d02350..68c9514 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -47,6 +47,7 @@
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
@@ -812,9 +813,11 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
}
}
-void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+ const TargetLibraryInfo *li) {
AA = &aa;
GFI = gfi;
+ LibInfo = li;
TD = DAG.getTarget().getTargetData();
LPadToCallSiteMap.clear();
}
@@ -1335,6 +1338,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
Condition = getICmpCondCode(IC->getPredicate());
} else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
Condition = getFCmpCondCode(FC->getPredicate());
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
} else {
Condition = ISD::SETEQ; // silence warning.
llvm_unreachable("Unknown compare instruction");
@@ -2002,7 +2007,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
}
static inline bool areJTsAllowed(const TargetLowering &TLI) {
- return !DisableJumpTables &&
+ return !TLI.getTargetMachine().Options.DisableJumpTables &&
(TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
}
@@ -2625,6 +2630,8 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Condition = getFCmpCondCode(predicate);
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
EVT DestVT = TLI.getValueType(I.getType());
setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
}
@@ -3095,7 +3102,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
unsigned Amt = ElementSize.logBase2();
IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
N.getValueType(), IdxN,
- DAG.getConstant(Amt, TLI.getPointerTy()));
+ DAG.getConstant(Amt, IdxN.getValueType()));
} else {
SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
@@ -4775,11 +4782,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getRoot(), getValue(I.getArgOperand(0))));
return 0;
}
- case Intrinsic::eh_sjlj_dispatch_setup: {
- DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
- getRoot(), getValue(I.getArgOperand(0))));
- return 0;
- }
case Intrinsic::x86_mmx_pslli_w:
case Intrinsic::x86_mmx_pslli_d:
@@ -4946,14 +4948,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return 0;
case Intrinsic::cttz: {
SDValue Arg = getValue(I.getArgOperand(0));
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
EVT Ty = Arg.getValueType();
- setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg));
+ setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
+ dl, Ty, Arg));
return 0;
}
case Intrinsic::ctlz: {
SDValue Arg = getValue(I.getArgOperand(0));
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
EVT Ty = Arg.getValueType();
- setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg));
+ setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
+ dl, Ty, Arg));
return 0;
}
case Intrinsic::ctpop: {
@@ -5064,7 +5070,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::trap: {
- StringRef TrapFuncName = getTrapFunctionName();
+ StringRef TrapFuncName = TM.Options.getTrapFunctionName();
if (TrapFuncName.empty()) {
DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
return 0;
@@ -5226,7 +5232,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// If there's a possibility that fast-isel has already selected some amount
// of the current basic block, don't emit a tail call.
- if (isTailCall && EnableFastISel)
+ if (isTailCall && TM.Options.EnableFastISel)
isTailCall = false;
std::pair<SDValue,SDValue> Result =
@@ -5510,7 +5516,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// can't be a library call.
if (!F->hasLocalLinkage() && F->hasName()) {
StringRef Name = F->getName();
- if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") {
+ if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") ||
+ (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") ||
+ (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) {
if (I.getNumArgOperands() == 2 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType() &&
@@ -5521,7 +5529,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
LHS.getValueType(), LHS, RHS));
return;
}
- } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
+ } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") ||
+ (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") ||
+ (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) {
if (I.getNumArgOperands() == 1 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType()) {
@@ -5530,7 +5540,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
Tmp.getValueType(), Tmp));
return;
}
- } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
+ } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") ||
+ (LibInfo->has(LibFunc::sinf) && Name == "sinf") ||
+ (LibInfo->has(LibFunc::sinl) && Name == "sinl")) {
if (I.getNumArgOperands() == 1 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType() &&
@@ -5540,7 +5552,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
Tmp.getValueType(), Tmp));
return;
}
- } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
+ } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") ||
+ (LibInfo->has(LibFunc::cosf) && Name == "cosf") ||
+ (LibInfo->has(LibFunc::cosl) && Name == "cosl")) {
if (I.getNumArgOperands() == 1 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType() &&
@@ -5550,7 +5564,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
Tmp.getValueType(), Tmp));
return;
}
- } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
+ } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") ||
+ (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") ||
+ (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) {
if (I.getNumArgOperands() == 1 && // Basic sanity checks.
I.getArgOperand(0)->getType()->isFloatingPointTy() &&
I.getType() == I.getArgOperand(0)->getType() &&
@@ -5560,6 +5576,83 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
Tmp.getValueType(), Tmp));
return;
}
+ } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") ||
+ (LibInfo->has(LibFunc::floorf) && Name == "floorf") ||
+ (LibInfo->has(LibFunc::floorl) && Name == "floorl")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") ||
+ (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") ||
+ (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") ||
+ (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") ||
+ (LibInfo->has(LibFunc::ceill) && Name == "ceill")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") ||
+ (LibInfo->has(LibFunc::rintf) && Name == "rintf") ||
+ (LibInfo->has(LibFunc::rintl) && Name == "rintl")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") ||
+ (LibInfo->has(LibFunc::truncf) && Name == "truncf") ||
+ (LibInfo->has(LibFunc::truncl) && Name == "truncl")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") ||
+ (LibInfo->has(LibFunc::log2f) && Name == "log2f") ||
+ (LibInfo->has(LibFunc::log2l) && Name == "log2l")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
+ } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") ||
+ (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") ||
+ (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) {
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(),
+ Tmp.getValueType(), Tmp));
+ return;
+ }
} else if (Name == "memcmp") {
if (visitMemCmpCall(I))
return;
@@ -6516,10 +6609,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
/// entry block, return true. This includes arguments used by switches, since
/// the switch may expand into multiple basic blocks.
-static bool isOnlyUsedInEntryBlock(const Argument *A) {
+static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
// With FastISel active, we may be splitting blocks, so force creation
// of virtual registers for all non-dead arguments.
- if (EnableFastISel)
+ if (FastISel)
return A->use_empty();
const BasicBlock *Entry = A->getParent()->begin();
@@ -6709,7 +6802,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
SDB->getCurDebugLoc());
SDB->setValue(I, Res);
- if (!EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
+ if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
if (LoadSDNode *LNode =
dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
if (FrameIndexSDNode *FI =
@@ -6719,7 +6812,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
// If this argument is live outside of the entry block, insert a copy from
// wherever we got it to the vreg that other BB's will reference it as.
- if (!EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
+ if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
// If we can, though, try to skip creating an unnecessary vreg.
// FIXME: This isn't very clean... it would be nice to make this more
// general. It's also subtly incompatible with the hacks FastISel
@@ -6730,7 +6823,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
continue;
}
}
- if (!isOnlyUsedInEntryBlock(I)) {
+ if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) {
FuncInfo->InitializeRegForValue(I);
SDB->CopyToExportRegsIfNeeded(I);
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 0a21ca3..5147b6c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -67,6 +67,7 @@ class SIToFPInst;
class StoreInst;
class SwitchInst;
class TargetData;
+class TargetLibraryInfo;
class TargetLowering;
class TruncInst;
class UIToFPInst;
@@ -294,6 +295,7 @@ public:
SelectionDAG &DAG;
const TargetData *TD;
AliasAnalysis *AA;
+ const TargetLibraryInfo *LibInfo;
/// SwitchCases - Vector of CaseBlock structures used to communicate
/// SwitchInst code generation information.
@@ -338,7 +340,8 @@ public:
HasTailCall(false), Context(dag.getContext()) {
}
- void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
+ void init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+ const TargetLibraryInfo *li);
/// clear - Clear out the current SelectionDAG and the associated
/// state and prepare this SelectionDAGBuilder object to be used
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 8cecc17..3c95059 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -41,6 +41,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -61,6 +62,81 @@ STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
+#ifndef NDEBUG
+static cl::opt<bool>
+EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden,
+ cl::desc("Enable extra verbose messages in the \"fast\" "
+ "instruction selector"));
+ // Terminators
+STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret");
+STATISTIC(NumFastIselFailBr,"Fast isel fails on Br");
+STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch");
+STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr");
+STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke");
+STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume");
+STATISTIC(NumFastIselFailUnwind,"Fast isel fails on Unwind");
+STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable");
+
+ // Standard binary operators...
+STATISTIC(NumFastIselFailAdd,"Fast isel fails on Add");
+STATISTIC(NumFastIselFailFAdd,"Fast isel fails on FAdd");
+STATISTIC(NumFastIselFailSub,"Fast isel fails on Sub");
+STATISTIC(NumFastIselFailFSub,"Fast isel fails on FSub");
+STATISTIC(NumFastIselFailMul,"Fast isel fails on Mul");
+STATISTIC(NumFastIselFailFMul,"Fast isel fails on FMul");
+STATISTIC(NumFastIselFailUDiv,"Fast isel fails on UDiv");
+STATISTIC(NumFastIselFailSDiv,"Fast isel fails on SDiv");
+STATISTIC(NumFastIselFailFDiv,"Fast isel fails on FDiv");
+STATISTIC(NumFastIselFailURem,"Fast isel fails on URem");
+STATISTIC(NumFastIselFailSRem,"Fast isel fails on SRem");
+STATISTIC(NumFastIselFailFRem,"Fast isel fails on FRem");
+
+ // Logical operators...
+STATISTIC(NumFastIselFailAnd,"Fast isel fails on And");
+STATISTIC(NumFastIselFailOr,"Fast isel fails on Or");
+STATISTIC(NumFastIselFailXor,"Fast isel fails on Xor");
+
+ // Memory instructions...
+STATISTIC(NumFastIselFailAlloca,"Fast isel fails on Alloca");
+STATISTIC(NumFastIselFailLoad,"Fast isel fails on Load");
+STATISTIC(NumFastIselFailStore,"Fast isel fails on Store");
+STATISTIC(NumFastIselFailAtomicCmpXchg,"Fast isel fails on AtomicCmpXchg");
+STATISTIC(NumFastIselFailAtomicRMW,"Fast isel fails on AtomicRWM");
+STATISTIC(NumFastIselFailFence,"Fast isel fails on Frence");
+STATISTIC(NumFastIselFailGetElementPtr,"Fast isel fails on GetElementPtr");
+
+ // Convert instructions...
+STATISTIC(NumFastIselFailTrunc,"Fast isel fails on Trunc");
+STATISTIC(NumFastIselFailZExt,"Fast isel fails on ZExt");
+STATISTIC(NumFastIselFailSExt,"Fast isel fails on SExt");
+STATISTIC(NumFastIselFailFPTrunc,"Fast isel fails on FPTrunc");
+STATISTIC(NumFastIselFailFPExt,"Fast isel fails on FPExt");
+STATISTIC(NumFastIselFailFPToUI,"Fast isel fails on FPToUI");
+STATISTIC(NumFastIselFailFPToSI,"Fast isel fails on FPToSI");
+STATISTIC(NumFastIselFailUIToFP,"Fast isel fails on UIToFP");
+STATISTIC(NumFastIselFailSIToFP,"Fast isel fails on SIToFP");
+STATISTIC(NumFastIselFailIntToPtr,"Fast isel fails on IntToPtr");
+STATISTIC(NumFastIselFailPtrToInt,"Fast isel fails on PtrToInt");
+STATISTIC(NumFastIselFailBitCast,"Fast isel fails on BitCast");
+
+ // Other instructions...
+STATISTIC(NumFastIselFailICmp,"Fast isel fails on ICmp");
+STATISTIC(NumFastIselFailFCmp,"Fast isel fails on FCmp");
+STATISTIC(NumFastIselFailPHI,"Fast isel fails on PHI");
+STATISTIC(NumFastIselFailSelect,"Fast isel fails on Select");
+STATISTIC(NumFastIselFailCall,"Fast isel fails on Call");
+STATISTIC(NumFastIselFailShl,"Fast isel fails on Shl");
+STATISTIC(NumFastIselFailLShr,"Fast isel fails on LShr");
+STATISTIC(NumFastIselFailAShr,"Fast isel fails on AShr");
+STATISTIC(NumFastIselFailVAArg,"Fast isel fails on VAArg");
+STATISTIC(NumFastIselFailExtractElement,"Fast isel fails on ExtractElement");
+STATISTIC(NumFastIselFailInsertElement,"Fast isel fails on InsertElement");
+STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector");
+STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue");
+STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue");
+STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad");
+#endif
+
static cl::opt<bool>
EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
cl::desc("Enable verbose messages in the \"fast\" "
@@ -177,7 +253,7 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
- assert(!MI->getDesc().hasPostISelHook() &&
+ assert(!MI->hasPostISelHook() &&
"If a target marks an instruction with 'hasPostISelHook', "
"it must implement TargetLowering::AdjustInstrPostInstrSelection!");
}
@@ -190,7 +266,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
CodeGenOpt::Level OL) :
MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
FuncInfo(new FunctionLoweringInfo(TLI)),
- CurDAG(new SelectionDAG(tm)),
+ CurDAG(new SelectionDAG(tm, OL)),
SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
GFI(),
OptLevel(OL),
@@ -198,6 +274,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
+ initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
}
SelectionDAGISel::~SelectionDAGISel() {
@@ -211,6 +288,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<AliasAnalysis>();
AU.addRequired<GCModuleInfo>();
AU.addPreserved<GCModuleInfo>();
+ AU.addRequired<TargetLibraryInfo>();
if (UseMBPI && OptLevel != CodeGenOpt::None)
AU.addRequired<BranchProbabilityInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -256,9 +334,9 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Do some sanity-checking on the command-line options.
- assert((!EnableFastISelVerbose || EnableFastISel) &&
+ assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) &&
"-fast-isel-verbose requires -fast-isel");
- assert((!EnableFastISelAbort || EnableFastISel) &&
+ assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
"-fast-isel-abort requires -fast-isel");
const Function &Fn = *mf.getFunction();
@@ -268,6 +346,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
RegInfo = &MF->getRegInfo();
AA = &getAnalysis<AliasAnalysis>();
+ LibInfo = &getAnalysis<TargetLibraryInfo>();
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
@@ -282,7 +361,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
else
FuncInfo->BPI = 0;
- SDB->init(GFI, *AA);
+ SDB->init(GFI, *AA, LibInfo);
SelectAllBasicBlocks(Fn);
@@ -346,7 +425,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TII.get(TargetOpcode::DBG_VALUE))
.addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug)
.addImm(Offset).addMetadata(Variable);
- EntryMBB->insertAfter(CopyUseMI, NewMI);
+ MachineBasicBlock::iterator Pos = CopyUseMI;
+ EntryMBB->insertAfter(Pos, NewMI);
}
}
}
@@ -820,10 +900,88 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
!FuncInfo->isExportedInst(I); // Exported instrs must be computed.
}
+#ifndef NDEBUG
+static void collectFailStats(const Instruction *I) {
+ switch (I->getOpcode()) {
+ default: assert (0 && "<Invalid operator> ");
+
+ // Terminators
+ case Instruction::Ret: NumFastIselFailRet++; return;
+ case Instruction::Br: NumFastIselFailBr++; return;
+ case Instruction::Switch: NumFastIselFailSwitch++; return;
+ case Instruction::IndirectBr: NumFastIselFailIndirectBr++; return;
+ case Instruction::Invoke: NumFastIselFailInvoke++; return;
+ case Instruction::Resume: NumFastIselFailResume++; return;
+ case Instruction::Unwind: NumFastIselFailUnwind++; return;
+ case Instruction::Unreachable: NumFastIselFailUnreachable++; return;
+
+ // Standard binary operators...
+ case Instruction::Add: NumFastIselFailAdd++; return;
+ case Instruction::FAdd: NumFastIselFailFAdd++; return;
+ case Instruction::Sub: NumFastIselFailSub++; return;
+ case Instruction::FSub: NumFastIselFailFSub++; return;
+ case Instruction::Mul: NumFastIselFailMul++; return;
+ case Instruction::FMul: NumFastIselFailFMul++; return;
+ case Instruction::UDiv: NumFastIselFailUDiv++; return;
+ case Instruction::SDiv: NumFastIselFailSDiv++; return;
+ case Instruction::FDiv: NumFastIselFailFDiv++; return;
+ case Instruction::URem: NumFastIselFailURem++; return;
+ case Instruction::SRem: NumFastIselFailSRem++; return;
+ case Instruction::FRem: NumFastIselFailFRem++; return;
+
+ // Logical operators...
+ case Instruction::And: NumFastIselFailAnd++; return;
+ case Instruction::Or: NumFastIselFailOr++; return;
+ case Instruction::Xor: NumFastIselFailXor++; return;
+
+ // Memory instructions...
+ case Instruction::Alloca: NumFastIselFailAlloca++; return;
+ case Instruction::Load: NumFastIselFailLoad++; return;
+ case Instruction::Store: NumFastIselFailStore++; return;
+ case Instruction::AtomicCmpXchg: NumFastIselFailAtomicCmpXchg++; return;
+ case Instruction::AtomicRMW: NumFastIselFailAtomicRMW++; return;
+ case Instruction::Fence: NumFastIselFailFence++; return;
+ case Instruction::GetElementPtr: NumFastIselFailGetElementPtr++; return;
+
+ // Convert instructions...
+ case Instruction::Trunc: NumFastIselFailTrunc++; return;
+ case Instruction::ZExt: NumFastIselFailZExt++; return;
+ case Instruction::SExt: NumFastIselFailSExt++; return;
+ case Instruction::FPTrunc: NumFastIselFailFPTrunc++; return;
+ case Instruction::FPExt: NumFastIselFailFPExt++; return;
+ case Instruction::FPToUI: NumFastIselFailFPToUI++; return;
+ case Instruction::FPToSI: NumFastIselFailFPToSI++; return;
+ case Instruction::UIToFP: NumFastIselFailUIToFP++; return;
+ case Instruction::SIToFP: NumFastIselFailSIToFP++; return;
+ case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return;
+ case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return;
+ case Instruction::BitCast: NumFastIselFailBitCast++; return;
+
+ // Other instructions...
+ case Instruction::ICmp: NumFastIselFailICmp++; return;
+ case Instruction::FCmp: NumFastIselFailFCmp++; return;
+ case Instruction::PHI: NumFastIselFailPHI++; return;
+ case Instruction::Select: NumFastIselFailSelect++; return;
+ case Instruction::Call: NumFastIselFailCall++; return;
+ case Instruction::Shl: NumFastIselFailShl++; return;
+ case Instruction::LShr: NumFastIselFailLShr++; return;
+ case Instruction::AShr: NumFastIselFailAShr++; return;
+ case Instruction::VAArg: NumFastIselFailVAArg++; return;
+ case Instruction::ExtractElement: NumFastIselFailExtractElement++; return;
+ case Instruction::InsertElement: NumFastIselFailInsertElement++; return;
+ case Instruction::ShuffleVector: NumFastIselFailShuffleVector++; return;
+ case Instruction::ExtractValue: NumFastIselFailExtractValue++; return;
+ case Instruction::InsertValue: NumFastIselFailInsertValue++; return;
+ case Instruction::LandingPad: NumFastIselFailLandingPad++; return;
+ }
+ return;
+}
+#endif
+
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = 0;
- if (EnableFastISel)
+ if (TM.Options.EnableFastISel)
FastIS = TLI.createFastISel(*FuncInfo);
// Iterate over all basic blocks in the function.
@@ -931,6 +1089,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
continue;
}
+#ifndef NDEBUG
+ if (EnableFastISelVerbose2)
+ collectFailStats(Inst);
+#endif
+
// Then handle certain instructions as single-LLVM-Instruction blocks.
if (isa<CallInst>(Inst)) {
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index d7bad43..9ced1ac 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -577,16 +577,26 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
// These library functions default to expand.
- setOperationAction(ISD::FLOG , MVT::f64, Expand);
- setOperationAction(ISD::FLOG2, MVT::f64, Expand);
- setOperationAction(ISD::FLOG10,MVT::f64, Expand);
- setOperationAction(ISD::FEXP , MVT::f64, Expand);
- setOperationAction(ISD::FEXP2, MVT::f64, Expand);
- setOperationAction(ISD::FLOG , MVT::f32, Expand);
- setOperationAction(ISD::FLOG2, MVT::f32, Expand);
- setOperationAction(ISD::FLOG10,MVT::f32, Expand);
- setOperationAction(ISD::FEXP , MVT::f32, Expand);
- setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG , MVT::f64, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+ setOperationAction(ISD::FEXP , MVT::f64, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::f64, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
+ setOperationAction(ISD::FLOG , MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP , MVT::f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::f32, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f32, Expand);
+ setOperationAction(ISD::FRINT, MVT::f32, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
@@ -1473,9 +1483,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
SDValue InnerOp = InOp.getNode()->getOperand(0);
EVT InnerVT = InnerOp.getValueType();
- if ((APInt::getHighBitsSet(BitWidth,
- BitWidth - InnerVT.getSizeInBits()) &
- DemandedMask) == 0 &&
+ unsigned InnerBits = InnerVT.getSizeInBits();
+ if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 &&
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
EVT ShTy = getShiftAmountTy(InnerVT);
if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
@@ -1545,7 +1554,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
- if (DemandedMask == 1)
+ if (NewMask == 1)
return TLO.CombineTo(Op,
TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
Op.getOperand(0), Op.getOperand(1)));
@@ -1783,7 +1792,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::BITCAST:
// If this is an FP->Int bitcast and if the sign bit is the only
// thing demanded, turn this into a FGETSIGN.
- if (!Op.getValueType().isVector() &&
+ if (!TLO.LegalOperations() &&
+ !Op.getValueType().isVector() &&
!Op.getOperand(0).getValueType().isVector() &&
NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
Op.getOperand(0).getValueType().isFloatingPoint()) {
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index 13f269e..70fcf55 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -124,7 +124,7 @@ MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) {
}
bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
- return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn());
+ return (MBB && !MBB->empty() && MBB->back().isReturn());
}
// Initialize shrink wrapping DFA sets, called before iterations.
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index c865192..8e2f74f 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -69,6 +69,8 @@ namespace {
private:
bool setupEntryBlockAndCallSites(Function &F);
+ void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+ Value *SelVal);
Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads);
void lowerIncomingArguments(Function &F);
void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst*> Invokes);
@@ -138,6 +140,38 @@ static void MarkBlocksLiveIn(BasicBlock *BB,
MarkBlocksLiveIn(*PI, LiveBBs);
}
+/// substituteLPadValues - Substitute the values returned by the landingpad
+/// instruction with those returned by the personality function.
+void SjLjEHPass::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+ Value *SelVal) {
+ SmallVector<Value*, 8> UseWorkList(LPI->use_begin(), LPI->use_end());
+ while (!UseWorkList.empty()) {
+ Value *Val = UseWorkList.pop_back_val();
+ ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val);
+ if (!EVI) continue;
+ if (EVI->getNumIndices() != 1) continue;
+ if (*EVI->idx_begin() == 0)
+ EVI->replaceAllUsesWith(ExnVal);
+ else if (*EVI->idx_begin() == 1)
+ EVI->replaceAllUsesWith(SelVal);
+ if (EVI->getNumUses() == 0)
+ EVI->eraseFromParent();
+ }
+
+ if (LPI->getNumUses() == 0) return;
+
+ // There are still some uses of LPI. Construct an aggregate with the exception
+ // values and replace the LPI with that aggregate.
+ Type *LPadType = LPI->getType();
+ Value *LPadVal = UndefValue::get(LPadType);
+ IRBuilder<>
+ Builder(llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal))));
+ LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
+ LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
+
+ LPI->replaceAllUsesWith(LPadVal);
+}
+
/// setupFunctionContext - Allocate the function context on the stack and fill
/// it with all of the data that we know at this point.
Value *SjLjEHPass::
@@ -189,12 +223,7 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
ExnVal = Builder.CreateIntToPtr(ExnVal, Type::getInt8PtrTy(F.getContext()));
Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
- Type *LPadType = LPI->getType();
- Value *LPadVal = UndefValue::get(LPadType);
- LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
- LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
-
- LPI->replaceAllUsesWith(LPadVal);
+ substituteLPadValues(LPI, ExnVal, SelVal);
}
// Personality function
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 751d604..c086073 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -80,7 +80,7 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin();
I != E;) {
--I;
- if (I->getDesc().isCall()) {
+ if (I->isCall()) {
LSP.second = LIS.getInstructionIndex(I);
break;
}
diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp
deleted file mode 100644
index 16cf9b8..0000000
--- a/lib/CodeGen/Splitter.cpp
+++ /dev/null
@@ -1,827 +0,0 @@
-//===-- llvm/CodeGen/Splitter.cpp - Splitter -----------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "loopsplitter"
-
-#include "Splitter.h"
-
-#include "llvm/Module.h"
-#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-using namespace llvm;
-
-char LoopSplitter::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopSplitter, "loop-splitting",
- "Split virtual regists across loop boundaries.", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_END(LoopSplitter, "loop-splitting",
- "Split virtual regists across loop boundaries.", false, false)
-
-namespace llvm {
-
- class StartSlotComparator {
- public:
- StartSlotComparator(LiveIntervals &lis) : lis(lis) {}
- bool operator()(const MachineBasicBlock *mbb1,
- const MachineBasicBlock *mbb2) const {
- return lis.getMBBStartIdx(mbb1) < lis.getMBBStartIdx(mbb2);
- }
- private:
- LiveIntervals &lis;
- };
-
- class LoopSplit {
- public:
- LoopSplit(LoopSplitter &ls, LiveInterval &li, MachineLoop &loop)
- : ls(ls), li(li), loop(loop), valid(true), inSplit(false), newLI(0) {
- assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
- "Cannot split physical registers.");
- }
-
- LiveInterval& getLI() const { return li; }
-
- MachineLoop& getLoop() const { return loop; }
-
- bool isValid() const { return valid; }
-
- bool isWorthwhile() const { return valid && (inSplit || !outSplits.empty()); }
-
- void invalidate() { valid = false; }
-
- void splitIncoming() { inSplit = true; }
-
- void splitOutgoing(MachineLoop::Edge &edge) { outSplits.insert(edge); }
-
- void addLoopInstr(MachineInstr *i) { loopInstrs.push_back(i); }
-
- void apply() {
- assert(valid && "Attempt to apply invalid split.");
- applyIncoming();
- applyOutgoing();
- copyRanges();
- renameInside();
- }
-
- private:
- LoopSplitter &ls;
- LiveInterval &li;
- MachineLoop &loop;
- bool valid, inSplit;
- std::set<MachineLoop::Edge> outSplits;
- std::vector<MachineInstr*> loopInstrs;
-
- LiveInterval *newLI;
- std::map<VNInfo*, VNInfo*> vniMap;
-
- LiveInterval* getNewLI() {
- if (newLI == 0) {
- const TargetRegisterClass *trc = ls.mri->getRegClass(li.reg);
- unsigned vreg = ls.mri->createVirtualRegister(trc);
- newLI = &ls.lis->getOrCreateInterval(vreg);
- }
- return newLI;
- }
-
- VNInfo* getNewVNI(VNInfo *oldVNI) {
- VNInfo *newVNI = vniMap[oldVNI];
-
- if (newVNI == 0) {
- newVNI = getNewLI()->createValueCopy(oldVNI,
- ls.lis->getVNInfoAllocator());
- vniMap[oldVNI] = newVNI;
- }
-
- return newVNI;
- }
-
- void applyIncoming() {
- if (!inSplit) {
- return;
- }
-
- MachineBasicBlock *preHeader = loop.getLoopPreheader();
- if (preHeader == 0) {
- assert(ls.canInsertPreHeader(loop) &&
- "Can't insert required preheader.");
- preHeader = &ls.insertPreHeader(loop);
- }
-
- LiveRange *preHeaderRange =
- ls.lis->findExitingRange(li, preHeader);
- assert(preHeaderRange != 0 && "Range not live into preheader.");
-
- // Insert the new copy.
- MachineInstr *copy = BuildMI(*preHeader,
- preHeader->getFirstTerminator(),
- DebugLoc(),
- ls.tii->get(TargetOpcode::COPY))
- .addReg(getNewLI()->reg, RegState::Define)
- .addReg(li.reg, RegState::Kill);
-
- ls.lis->InsertMachineInstrInMaps(copy);
-
- SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getRegSlot();
-
- VNInfo *newVal = getNewVNI(preHeaderRange->valno);
- newVal->def = copyDefIdx;
- newVal->setCopy(copy);
- li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true);
-
- getNewLI()->addRange(LiveRange(copyDefIdx,
- ls.lis->getMBBEndIdx(preHeader),
- newVal));
- }
-
- void applyOutgoing() {
-
- for (std::set<MachineLoop::Edge>::iterator osItr = outSplits.begin(),
- osEnd = outSplits.end();
- osItr != osEnd; ++osItr) {
- MachineLoop::Edge edge = *osItr;
- MachineBasicBlock *outBlock = edge.second;
- if (ls.isCriticalEdge(edge)) {
- assert(ls.canSplitEdge(edge) && "Unsplitable critical edge.");
- outBlock = &ls.splitEdge(edge, loop);
- }
- LiveRange *outRange = ls.lis->findEnteringRange(li, outBlock);
- assert(outRange != 0 && "No exiting range?");
-
- MachineInstr *copy = BuildMI(*outBlock, outBlock->begin(),
- DebugLoc(),
- ls.tii->get(TargetOpcode::COPY))
- .addReg(li.reg, RegState::Define)
- .addReg(getNewLI()->reg, RegState::Kill);
-
- ls.lis->InsertMachineInstrInMaps(copy);
-
- SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getRegSlot();
-
- // Blow away output range definition.
- outRange->valno->def = ls.lis->getInvalidIndex();
- li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx);
-
- SlotIndex newDefIdx = ls.lis->getMBBStartIdx(outBlock);
- assert(ls.lis->getInstructionFromIndex(newDefIdx) == 0 &&
- "PHI def index points at actual instruction.");
- VNInfo *newVal =
- getNewLI()->getNextValue(newDefIdx, 0, ls.lis->getVNInfoAllocator());
-
- getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock),
- copyDefIdx, newVal));
-
- }
- }
-
- void copyRange(LiveRange &lr) {
- std::pair<bool, LoopSplitter::SlotPair> lsr =
- ls.getLoopSubRange(lr, loop);
-
- if (!lsr.first)
- return;
-
- LiveRange loopRange(lsr.second.first, lsr.second.second,
- getNewVNI(lr.valno));
-
- li.removeRange(loopRange.start, loopRange.end, true);
-
- getNewLI()->addRange(loopRange);
- }
-
- void copyRanges() {
- for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
- iEnd = loopInstrs.end();
- iItr != iEnd; ++iItr) {
- MachineInstr &instr = **iItr;
- SlotIndex instrIdx = ls.lis->getInstructionIndex(&instr);
- if (instr.modifiesRegister(li.reg, 0)) {
- LiveRange *defRange =
- li.getLiveRangeContaining(instrIdx.getRegSlot());
- if (defRange != 0) // May have caught this already.
- copyRange(*defRange);
- }
- if (instr.readsRegister(li.reg, 0)) {
- LiveRange *useRange =
- li.getLiveRangeContaining(instrIdx.getRegSlot(true));
- if (useRange != 0) { // May have caught this already.
- copyRange(*useRange);
- }
- }
- }
-
- for (MachineLoop::block_iterator bbItr = loop.block_begin(),
- bbEnd = loop.block_end();
- bbItr != bbEnd; ++bbItr) {
- MachineBasicBlock &loopBlock = **bbItr;
- LiveRange *enteringRange =
- ls.lis->findEnteringRange(li, &loopBlock);
- if (enteringRange != 0) {
- copyRange(*enteringRange);
- }
- }
- }
-
- void renameInside() {
- for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
- iEnd = loopInstrs.end();
- iItr != iEnd; ++iItr) {
- MachineInstr &instr = **iItr;
- for (unsigned i = 0; i < instr.getNumOperands(); ++i) {
- MachineOperand &mop = instr.getOperand(i);
- if (mop.isReg() && mop.getReg() == li.reg) {
- mop.setReg(getNewLI()->reg);
- }
- }
- }
- }
-
- };
-
- void LoopSplitter::getAnalysisUsage(AnalysisUsage &au) const {
- au.addRequired<MachineDominatorTree>();
- au.addPreserved<MachineDominatorTree>();
- au.addRequired<MachineLoopInfo>();
- au.addPreserved<MachineLoopInfo>();
- au.addPreservedID(RegisterCoalescerPassID);
- au.addPreserved<CalculateSpillWeights>();
- au.addPreserved<LiveStacks>();
- au.addRequired<SlotIndexes>();
- au.addPreserved<SlotIndexes>();
- au.addRequired<LiveIntervals>();
- au.addPreserved<LiveIntervals>();
- MachineFunctionPass::getAnalysisUsage(au);
- }
-
- bool LoopSplitter::runOnMachineFunction(MachineFunction &fn) {
-
- mf = &fn;
- mri = &mf->getRegInfo();
- tii = mf->getTarget().getInstrInfo();
- tri = mf->getTarget().getRegisterInfo();
- sis = &getAnalysis<SlotIndexes>();
- lis = &getAnalysis<LiveIntervals>();
- mli = &getAnalysis<MachineLoopInfo>();
- mdt = &getAnalysis<MachineDominatorTree>();
-
- fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." +
- mf->getFunction()->getName().str();
-
- dbgs() << "Splitting " << mf->getFunction()->getName() << ".";
-
- dumpOddTerminators();
-
-// dbgs() << "----------------------------------------\n";
-// lis->dump();
-// dbgs() << "----------------------------------------\n";
-
-// std::deque<MachineLoop*> loops;
-// std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
-// dbgs() << "Loops:\n";
-// while (!loops.empty()) {
-// MachineLoop &loop = *loops.front();
-// loops.pop_front();
-// std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
-
-// dumpLoopInfo(loop);
-// }
-
- //lis->dump();
- //exit(0);
-
- // Setup initial intervals.
- for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end();
- liItr != liEnd; ++liItr) {
- LiveInterval *li = liItr->second;
-
- if (TargetRegisterInfo::isVirtualRegister(li->reg) &&
- !lis->intervalIsInOneMBB(*li)) {
- intervals.push_back(li);
- }
- }
-
- processIntervals();
-
- intervals.clear();
-
-// dbgs() << "----------------------------------------\n";
-// lis->dump();
-// dbgs() << "----------------------------------------\n";
-
- dumpOddTerminators();
-
- //exit(1);
-
- return false;
- }
-
- void LoopSplitter::releaseMemory() {
- fqn.clear();
- intervals.clear();
- loopRangeMap.clear();
- }
-
- void LoopSplitter::dumpOddTerminators() {
- for (MachineFunction::iterator bbItr = mf->begin(), bbEnd = mf->end();
- bbItr != bbEnd; ++bbItr) {
- MachineBasicBlock *mbb = &*bbItr;
- MachineBasicBlock *a = 0, *b = 0;
- SmallVector<MachineOperand, 4> c;
- if (tii->AnalyzeBranch(*mbb, a, b, c)) {
- dbgs() << "MBB#" << mbb->getNumber() << " has multiway terminator.\n";
- dbgs() << " Terminators:\n";
- for (MachineBasicBlock::iterator iItr = mbb->begin(), iEnd = mbb->end();
- iItr != iEnd; ++iItr) {
- MachineInstr *instr= &*iItr;
- dbgs() << " " << *instr << "";
- }
- dbgs() << "\n Listed successors: [ ";
- for (MachineBasicBlock::succ_iterator sItr = mbb->succ_begin(), sEnd = mbb->succ_end();
- sItr != sEnd; ++sItr) {
- MachineBasicBlock *succMBB = *sItr;
- dbgs() << succMBB->getNumber() << " ";
- }
- dbgs() << "]\n\n";
- }
- }
- }
-
- void LoopSplitter::dumpLoopInfo(MachineLoop &loop) {
- MachineBasicBlock &headerBlock = *loop.getHeader();
- typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
- ExitEdgesList exitEdges;
- loop.getExitEdges(exitEdges);
-
- dbgs() << " Header: BB#" << headerBlock.getNumber() << ", Contains: [ ";
- for (std::vector<MachineBasicBlock*>::const_iterator
- subBlockItr = loop.getBlocks().begin(),
- subBlockEnd = loop.getBlocks().end();
- subBlockItr != subBlockEnd; ++subBlockItr) {
- MachineBasicBlock &subBlock = **subBlockItr;
- dbgs() << "BB#" << subBlock.getNumber() << " ";
- }
- dbgs() << "], Exit edges: [ ";
- for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
- exitEdgeEnd = exitEdges.end();
- exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
- MachineLoop::Edge &exitEdge = *exitEdgeItr;
- dbgs() << "(MBB#" << exitEdge.first->getNumber()
- << ", MBB#" << exitEdge.second->getNumber() << ") ";
- }
- dbgs() << "], Sub-Loop Headers: [ ";
- for (MachineLoop::iterator subLoopItr = loop.begin(),
- subLoopEnd = loop.end();
- subLoopItr != subLoopEnd; ++subLoopItr) {
- MachineLoop &subLoop = **subLoopItr;
- MachineBasicBlock &subLoopBlock = *subLoop.getHeader();
- dbgs() << "BB#" << subLoopBlock.getNumber() << " ";
- }
- dbgs() << "]\n";
- }
-
- void LoopSplitter::updateTerminators(MachineBasicBlock &mbb) {
- mbb.updateTerminator();
-
- for (MachineBasicBlock::iterator miItr = mbb.begin(), miEnd = mbb.end();
- miItr != miEnd; ++miItr) {
- if (lis->isNotInMIMap(miItr)) {
- lis->InsertMachineInstrInMaps(miItr);
- }
- }
- }
-
- bool LoopSplitter::canInsertPreHeader(MachineLoop &loop) {
- MachineBasicBlock *header = loop.getHeader();
- MachineBasicBlock *a = 0, *b = 0;
- SmallVector<MachineOperand, 4> c;
-
- for (MachineBasicBlock::pred_iterator pbItr = header->pred_begin(),
- pbEnd = header->pred_end();
- pbItr != pbEnd; ++pbItr) {
- MachineBasicBlock *predBlock = *pbItr;
- if (!!tii->AnalyzeBranch(*predBlock, a, b, c)) {
- return false;
- }
- }
-
- MachineFunction::iterator headerItr(header);
- if (headerItr == mf->begin())
- return true;
- MachineBasicBlock *headerLayoutPred = llvm::prior(headerItr);
- assert(headerLayoutPred != 0 && "Header should have layout pred.");
-
- return (!tii->AnalyzeBranch(*headerLayoutPred, a, b, c));
- }
-
- MachineBasicBlock& LoopSplitter::insertPreHeader(MachineLoop &loop) {
- assert(loop.getLoopPreheader() == 0 && "Loop already has preheader.");
-
- MachineBasicBlock &header = *loop.getHeader();
-
- // Save the preds - we'll need to update them once we insert the preheader.
- typedef std::set<MachineBasicBlock*> HeaderPreds;
- HeaderPreds headerPreds;
-
- for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
- predEnd = header.pred_end();
- predItr != predEnd; ++predItr) {
- if (!loop.contains(*predItr))
- headerPreds.insert(*predItr);
- }
-
- assert(!headerPreds.empty() && "No predecessors for header?");
-
- //dbgs() << fqn << " MBB#" << header.getNumber() << " inserting preheader...";
-
- MachineBasicBlock *preHeader =
- mf->CreateMachineBasicBlock(header.getBasicBlock());
-
- assert(preHeader != 0 && "Failed to create pre-header.");
-
- mf->insert(header, preHeader);
-
- for (HeaderPreds::iterator hpItr = headerPreds.begin(),
- hpEnd = headerPreds.end();
- hpItr != hpEnd; ++hpItr) {
- assert(*hpItr != 0 && "How'd a null predecessor get into this set?");
- MachineBasicBlock &hp = **hpItr;
- hp.ReplaceUsesOfBlockWith(&header, preHeader);
- }
- preHeader->addSuccessor(&header);
-
- MachineBasicBlock *oldLayoutPred =
- llvm::prior(MachineFunction::iterator(preHeader));
- if (oldLayoutPred != 0) {
- updateTerminators(*oldLayoutPred);
- }
-
- lis->InsertMBBInMaps(preHeader);
-
- if (MachineLoop *parentLoop = loop.getParentLoop()) {
- assert(parentLoop->getHeader() != loop.getHeader() &&
- "Parent loop has same header?");
- parentLoop->addBasicBlockToLoop(preHeader, mli->getBase());
-
- // Invalidate all parent loop ranges.
- while (parentLoop != 0) {
- loopRangeMap.erase(parentLoop);
- parentLoop = parentLoop->getParentLoop();
- }
- }
-
- for (LiveIntervals::iterator liItr = lis->begin(),
- liEnd = lis->end();
- liItr != liEnd; ++liItr) {
- LiveInterval &li = *liItr->second;
-
- // Is this safe for physregs?
- // TargetRegisterInfo::isPhysicalRegister(li.reg) ||
- if (!lis->isLiveInToMBB(li, &header))
- continue;
-
- if (lis->isLiveInToMBB(li, preHeader)) {
- assert(lis->isLiveOutOfMBB(li, preHeader) &&
- "Range terminates in newly added preheader?");
- continue;
- }
-
- bool insertRange = false;
-
- for (MachineBasicBlock::pred_iterator predItr = preHeader->pred_begin(),
- predEnd = preHeader->pred_end();
- predItr != predEnd; ++predItr) {
- MachineBasicBlock *predMBB = *predItr;
- if (lis->isLiveOutOfMBB(li, predMBB)) {
- insertRange = true;
- break;
- }
- }
-
- if (!insertRange)
- continue;
-
- SlotIndex newDefIdx = lis->getMBBStartIdx(preHeader);
- assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
- "PHI def index points at actual instruction.");
- VNInfo *newVal = li.getNextValue(newDefIdx, 0, lis->getVNInfoAllocator());
- li.addRange(LiveRange(lis->getMBBStartIdx(preHeader),
- lis->getMBBEndIdx(preHeader),
- newVal));
- }
-
-
- //dbgs() << "Dumping SlotIndexes:\n";
- //sis->dump();
-
- //dbgs() << "done. (Added MBB#" << preHeader->getNumber() << ")\n";
-
- return *preHeader;
- }
-
- bool LoopSplitter::isCriticalEdge(MachineLoop::Edge &edge) {
- assert(edge.first->succ_size() > 1 && "Non-sensical edge.");
- if (edge.second->pred_size() > 1)
- return true;
- return false;
- }
-
- bool LoopSplitter::canSplitEdge(MachineLoop::Edge &edge) {
- MachineFunction::iterator outBlockItr(edge.second);
- if (outBlockItr == mf->begin())
- return true;
- MachineBasicBlock *outBlockLayoutPred = llvm::prior(outBlockItr);
- assert(outBlockLayoutPred != 0 && "Should have a layout pred if out!=begin.");
- MachineBasicBlock *a = 0, *b = 0;
- SmallVector<MachineOperand, 4> c;
- return (!tii->AnalyzeBranch(*outBlockLayoutPred, a, b, c) &&
- !tii->AnalyzeBranch(*edge.first, a, b, c));
- }
-
- MachineBasicBlock& LoopSplitter::splitEdge(MachineLoop::Edge &edge,
- MachineLoop &loop) {
-
- MachineBasicBlock &inBlock = *edge.first;
- MachineBasicBlock &outBlock = *edge.second;
-
- assert((inBlock.succ_size() > 1) && (outBlock.pred_size() > 1) &&
- "Splitting non-critical edge?");
-
- //dbgs() << fqn << " Splitting edge (MBB#" << inBlock.getNumber()
- // << " -> MBB#" << outBlock.getNumber() << ")...";
-
- MachineBasicBlock *splitBlock =
- mf->CreateMachineBasicBlock();
-
- assert(splitBlock != 0 && "Failed to create split block.");
-
- mf->insert(&outBlock, splitBlock);
-
- inBlock.ReplaceUsesOfBlockWith(&outBlock, splitBlock);
- splitBlock->addSuccessor(&outBlock);
-
- MachineBasicBlock *oldLayoutPred =
- llvm::prior(MachineFunction::iterator(splitBlock));
- if (oldLayoutPred != 0) {
- updateTerminators(*oldLayoutPred);
- }
-
- lis->InsertMBBInMaps(splitBlock);
-
- loopRangeMap.erase(&loop);
-
- MachineLoop *splitParentLoop = loop.getParentLoop();
- while (splitParentLoop != 0 &&
- !splitParentLoop->contains(&outBlock)) {
- splitParentLoop = splitParentLoop->getParentLoop();
- }
-
- if (splitParentLoop != 0) {
- assert(splitParentLoop->contains(&loop) &&
- "Split-block parent doesn't contain original loop?");
- splitParentLoop->addBasicBlockToLoop(splitBlock, mli->getBase());
-
- // Invalidate all parent loop ranges.
- while (splitParentLoop != 0) {
- loopRangeMap.erase(splitParentLoop);
- splitParentLoop = splitParentLoop->getParentLoop();
- }
- }
-
-
- for (LiveIntervals::iterator liItr = lis->begin(),
- liEnd = lis->end();
- liItr != liEnd; ++liItr) {
- LiveInterval &li = *liItr->second;
- bool intersects = lis->isLiveOutOfMBB(li, &inBlock) &&
- lis->isLiveInToMBB(li, &outBlock);
- if (lis->isLiveInToMBB(li, splitBlock)) {
- if (!intersects) {
- li.removeRange(lis->getMBBStartIdx(splitBlock),
- lis->getMBBEndIdx(splitBlock), true);
- }
- } else if (intersects) {
- SlotIndex newDefIdx = lis->getMBBStartIdx(splitBlock);
- assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
- "PHI def index points at actual instruction.");
- VNInfo *newVal = li.getNextValue(newDefIdx, 0,
- lis->getVNInfoAllocator());
- li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock),
- lis->getMBBEndIdx(splitBlock),
- newVal));
- }
- }
-
- //dbgs() << "done. (Added MBB#" << splitBlock->getNumber() << ")\n";
-
- return *splitBlock;
- }
-
- LoopSplitter::LoopRanges& LoopSplitter::getLoopRanges(MachineLoop &loop) {
- typedef std::set<MachineBasicBlock*, StartSlotComparator> LoopMBBSet;
- LoopRangeMap::iterator lrItr = loopRangeMap.find(&loop);
- if (lrItr == loopRangeMap.end()) {
- LoopMBBSet loopMBBs((StartSlotComparator(*lis)));
- std::copy(loop.block_begin(), loop.block_end(),
- std::inserter(loopMBBs, loopMBBs.begin()));
-
- assert(!loopMBBs.empty() && "No blocks in loop?");
-
- LoopRanges &loopRanges = loopRangeMap[&loop];
- assert(loopRanges.empty() && "Loop encountered but not processed?");
- SlotIndex oldEnd = lis->getMBBEndIdx(*loopMBBs.begin());
- loopRanges.push_back(
- std::make_pair(lis->getMBBStartIdx(*loopMBBs.begin()),
- lis->getInvalidIndex()));
- for (LoopMBBSet::iterator curBlockItr = llvm::next(loopMBBs.begin()),
- curBlockEnd = loopMBBs.end();
- curBlockItr != curBlockEnd; ++curBlockItr) {
- SlotIndex newStart = lis->getMBBStartIdx(*curBlockItr);
- if (newStart != oldEnd) {
- loopRanges.back().second = oldEnd;
- loopRanges.push_back(std::make_pair(newStart,
- lis->getInvalidIndex()));
- }
- oldEnd = lis->getMBBEndIdx(*curBlockItr);
- }
-
- loopRanges.back().second =
- lis->getMBBEndIdx(*llvm::prior(loopMBBs.end()));
-
- return loopRanges;
- }
- return lrItr->second;
- }
-
- std::pair<bool, LoopSplitter::SlotPair> LoopSplitter::getLoopSubRange(
- const LiveRange &lr,
- MachineLoop &loop) {
- LoopRanges &loopRanges = getLoopRanges(loop);
- LoopRanges::iterator lrItr = loopRanges.begin(),
- lrEnd = loopRanges.end();
- while (lrItr != lrEnd && lr.start >= lrItr->second) {
- ++lrItr;
- }
-
- if (lrItr == lrEnd) {
- SlotIndex invalid = lis->getInvalidIndex();
- return std::make_pair(false, SlotPair(invalid, invalid));
- }
-
- SlotIndex srStart(lr.start < lrItr->first ? lrItr->first : lr.start);
- SlotIndex srEnd(lr.end > lrItr->second ? lrItr->second : lr.end);
-
- return std::make_pair(true, SlotPair(srStart, srEnd));
- }
-
- void LoopSplitter::dumpLoopRanges(MachineLoop &loop) {
- LoopRanges &loopRanges = getLoopRanges(loop);
- dbgs() << "For loop MBB#" << loop.getHeader()->getNumber() << ", subranges are: [ ";
- for (LoopRanges::iterator lrItr = loopRanges.begin(), lrEnd = loopRanges.end();
- lrItr != lrEnd; ++lrItr) {
- dbgs() << "[" << lrItr->first << ", " << lrItr->second << ") ";
- }
- dbgs() << "]\n";
- }
-
- void LoopSplitter::processHeader(LoopSplit &split) {
- MachineBasicBlock &header = *split.getLoop().getHeader();
- //dbgs() << " Processing loop header BB#" << header.getNumber() << "\n";
-
- if (!lis->isLiveInToMBB(split.getLI(), &header))
- return; // Not live in, but nothing wrong so far.
-
- MachineBasicBlock *preHeader = split.getLoop().getLoopPreheader();
- if (!preHeader) {
-
- if (!canInsertPreHeader(split.getLoop())) {
- split.invalidate();
- return; // Couldn't insert a pre-header. Bail on this interval.
- }
-
- for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
- predEnd = header.pred_end();
- predItr != predEnd; ++predItr) {
- if (lis->isLiveOutOfMBB(split.getLI(), *predItr)) {
- split.splitIncoming();
- break;
- }
- }
- } else if (lis->isLiveOutOfMBB(split.getLI(), preHeader)) {
- split.splitIncoming();
- }
- }
-
- void LoopSplitter::processLoopExits(LoopSplit &split) {
- typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
- ExitEdgesList exitEdges;
- split.getLoop().getExitEdges(exitEdges);
-
- //dbgs() << " Processing loop exits:\n";
-
- for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
- exitEdgeEnd = exitEdges.end();
- exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
- MachineLoop::Edge exitEdge = *exitEdgeItr;
-
- LiveRange *outRange =
- split.getLI().getLiveRangeContaining(lis->getMBBStartIdx(exitEdge.second));
-
- if (outRange != 0) {
- if (isCriticalEdge(exitEdge) && !canSplitEdge(exitEdge)) {
- split.invalidate();
- return;
- }
-
- split.splitOutgoing(exitEdge);
- }
- }
- }
-
- void LoopSplitter::processLoopUses(LoopSplit &split) {
- std::set<MachineInstr*> processed;
-
- for (MachineRegisterInfo::reg_iterator
- rItr = mri->reg_begin(split.getLI().reg),
- rEnd = mri->reg_end();
- rItr != rEnd; ++rItr) {
- MachineInstr &instr = *rItr;
- if (split.getLoop().contains(&instr) && processed.count(&instr) == 0) {
- split.addLoopInstr(&instr);
- processed.insert(&instr);
- }
- }
-
- //dbgs() << " Rewriting reg" << li.reg << " to reg" << newLI->reg
- // << " in blocks [ ";
- //dbgs() << "]\n";
- }
-
- bool LoopSplitter::splitOverLoop(LiveInterval &li, MachineLoop &loop) {
- assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
- "Attempt to split physical register.");
-
- LoopSplit split(*this, li, loop);
- processHeader(split);
- if (split.isValid())
- processLoopExits(split);
- if (split.isValid())
- processLoopUses(split);
- if (split.isValid() /* && split.isWorthwhile() */) {
- split.apply();
- DEBUG(dbgs() << "Success.\n");
- return true;
- }
- DEBUG(dbgs() << "Failed.\n");
- return false;
- }
-
- void LoopSplitter::processInterval(LiveInterval &li) {
- std::deque<MachineLoop*> loops;
- std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
-
- while (!loops.empty()) {
- MachineLoop &loop = *loops.front();
- loops.pop_front();
- DEBUG(
- dbgs() << fqn << " reg" << li.reg << " " << li.weight << " BB#"
- << loop.getHeader()->getNumber() << " ";
- );
- if (!splitOverLoop(li, loop)) {
- // Couldn't split over outer loop, schedule sub-loops to be checked.
- std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
- }
- }
- }
-
- void LoopSplitter::processIntervals() {
- while (!intervals.empty()) {
- LiveInterval &li = *intervals.front();
- intervals.pop_front();
-
- assert(!lis->intervalIsInOneMBB(li) &&
- "Single interval in process worklist.");
-
- processInterval(li);
- }
- }
-
-}
diff --git a/lib/CodeGen/Splitter.h b/lib/CodeGen/Splitter.h
deleted file mode 100644
index 9fb1b8b..0000000
--- a/lib/CodeGen/Splitter.h
+++ /dev/null
@@ -1,101 +0,0 @@
-//===-- llvm/CodeGen/Splitter.h - Splitter -*- C++ -*----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_SPLITTER_H
-#define LLVM_CODEGEN_SPLITTER_H
-
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-
-#include <deque>
-#include <map>
-#include <string>
-#include <vector>
-
-namespace llvm {
-
- class LiveInterval;
- class LiveIntervals;
- struct LiveRange;
- class LoopSplit;
- class MachineDominatorTree;
- class MachineRegisterInfo;
- class SlotIndexes;
- class TargetInstrInfo;
- class VNInfo;
-
- class LoopSplitter : public MachineFunctionPass {
- friend class LoopSplit;
- public:
- static char ID;
-
- LoopSplitter() : MachineFunctionPass(ID) {
- initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &au) const;
-
- virtual bool runOnMachineFunction(MachineFunction &fn);
-
- virtual void releaseMemory();
-
-
- private:
-
- MachineFunction *mf;
- LiveIntervals *lis;
- MachineLoopInfo *mli;
- MachineRegisterInfo *mri;
- MachineDominatorTree *mdt;
- SlotIndexes *sis;
- const TargetInstrInfo *tii;
- const TargetRegisterInfo *tri;
-
- std::string fqn;
- std::deque<LiveInterval*> intervals;
-
- typedef std::pair<SlotIndex, SlotIndex> SlotPair;
- typedef std::vector<SlotPair> LoopRanges;
- typedef std::map<MachineLoop*, LoopRanges> LoopRangeMap;
- LoopRangeMap loopRangeMap;
-
- void dumpLoopInfo(MachineLoop &loop);
-
- void dumpOddTerminators();
-
- void updateTerminators(MachineBasicBlock &mbb);
-
- bool canInsertPreHeader(MachineLoop &loop);
- MachineBasicBlock& insertPreHeader(MachineLoop &loop);
-
- bool isCriticalEdge(MachineLoop::Edge &edge);
- bool canSplitEdge(MachineLoop::Edge &edge);
- MachineBasicBlock& splitEdge(MachineLoop::Edge &edge, MachineLoop &loop);
-
- LoopRanges& getLoopRanges(MachineLoop &loop);
- std::pair<bool, SlotPair> getLoopSubRange(const LiveRange &lr,
- MachineLoop &loop);
-
- void dumpLoopRanges(MachineLoop &loop);
-
- void processHeader(LoopSplit &split);
- void processLoopExits(LoopSplit &split);
- void processLoopUses(LoopSplit &split);
-
- bool splitOverLoop(LiveInterval &li, MachineLoop &loop);
-
- void processInterval(LiveInterval &li);
-
- void processIntervals();
- };
-
-}
-
-#endif
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 3a6211a..031377b 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -553,7 +553,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
bool HasIndirectbr = false;
if (!TailBB.empty())
- HasIndirectbr = TailBB.back().getDesc().isIndirectBranch();
+ HasIndirectbr = TailBB.back().isIndirectBranch();
if (HasIndirectbr && PreRegAlloc)
MaxDuplicateCount = 20;
@@ -561,22 +561,21 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
// Check the instructions in the block to determine whether tail-duplication
// is invalid or unlikely to be profitable.
unsigned InstrCount = 0;
- for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end();
- ++I) {
+ for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) {
// Non-duplicable things shouldn't be tail-duplicated.
- if (I->getDesc().isNotDuplicable())
+ if (I->isNotDuplicable())
return false;
// Do not duplicate 'return' instructions if this is a pre-regalloc run.
// A return may expand into a lot more instructions (e.g. reload of callee
// saved registers) after PEI.
- if (PreRegAlloc && I->getDesc().isReturn())
+ if (PreRegAlloc && I->isReturn())
return false;
// Avoid duplicating calls before register allocation. Calls presents a
// barrier to register allocation so duplicating them may end up increasing
// spills.
- if (PreRegAlloc && I->getDesc().isCall())
+ if (PreRegAlloc && I->isCall())
return false;
if (!I->isPHI() && !I->isDebugValue())
@@ -611,7 +610,7 @@ TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
++I;
if (I == E)
return true;
- return I->getDesc().isUnconditionalBranch();
+ return I->isUnconditionalBranch();
}
static bool
diff --git a/lib/Target/TargetFrameLowering.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 122f869..cadb878 100644
--- a/lib/Target/TargetFrameLowering.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -1,4 +1,4 @@
-//===----- TargetFrameLowering.cpp - Implement target frame interface ------==//
+//===----- TargetFrameLoweringImpl.cpp - Implement target frame interface --==//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 08e2b16..7ed9455 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -121,6 +122,9 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
+ assert(!MI->isBundle() &&
+ "TargetInstrInfoImpl::findCommutedOpIndices() can't handle bundles");
+
const MCInstrDesc &MCID = MI->getDesc();
if (!MCID.isCommutable())
return false;
@@ -136,11 +140,28 @@ bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
}
+bool
+TargetInstrInfoImpl::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ if (!MI->isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (MI->isBranch() && !MI->isBarrier())
+ return true;
+ if (!MI->isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+
bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const {
bool MadeChange = false;
+
+ assert(!MI->isBundle() &&
+ "TargetInstrInfoImpl::PredicateInstruction() can't handle bundles");
+
const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isPredicable())
+ if (!MI->isPredicable())
return false;
for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -218,7 +239,7 @@ TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
MachineFunction &MF) const {
- assert(!Orig->getDesc().isNotDuplicable() &&
+ assert(!Orig->isNotDuplicable() &&
"Instruction cannot be duplicated");
return MF.CloneMachineInstr(Orig);
}
@@ -288,10 +309,10 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) {
// Add a memory operand, foldMemoryOperandImpl doesn't do that.
assert((!(Flags & MachineMemOperand::MOStore) ||
- NewMI->getDesc().mayStore()) &&
+ NewMI->mayStore()) &&
"Folded a def to a non-store!");
assert((!(Flags & MachineMemOperand::MOLoad) ||
- NewMI->getDesc().mayLoad()) &&
+ NewMI->mayLoad()) &&
"Folded a use to a non-load!");
const MachineFrameInfo &MFI = *MF.getFrameInfo();
assert(MFI.getObjectOffset(FI) != -1);
@@ -331,7 +352,7 @@ MachineInstr*
TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr* LoadMI) const {
- assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!");
+ assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!");
#ifndef NDEBUG
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!");
@@ -382,10 +403,8 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
return true;
- const MCInstrDesc &MCID = MI->getDesc();
-
// Avoid instructions obviously unsafe for remat.
- if (MCID.isNotDuplicable() || MCID.mayStore() ||
+ if (MI->isNotDuplicable() || MI->mayStore() ||
MI->hasUnmodeledSideEffects())
return false;
@@ -395,7 +414,7 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
return false;
// Avoid instructions which load from potentially varying memory.
- if (MCID.mayLoad() && !MI->isInvariantLoad(AA))
+ if (MI->mayLoad() && !MI->isInvariantLoad(AA))
return false;
// If any of the registers accessed are non-constant, conservatively assume
@@ -456,7 +475,7 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
const MachineBasicBlock *MBB,
const MachineFunction &MF) const{
// Terminators and labels can't be scheduled around.
- if (MI->getDesc().isTerminator() || MI->isLabel())
+ if (MI->isTerminator() || MI->isLabel())
return true;
// Don't attempt to schedule around any instruction that defines
@@ -492,3 +511,32 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
return (ScheduleHazardRecognizer *)
new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
}
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return -1;
+
+ if (!DefNode->isMachineOpcode())
+ return -1;
+
+ unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
+ if (!UseNode->isMachineOpcode())
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+ unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *N) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ if (!N->isMachineOpcode())
+ return 1;
+
+ return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
+}
+
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index c43e5b6..7fe164a 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -536,9 +536,7 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
// Add information about the stub reference to MachOMMI so that the stub
// gets emitted by the asmprinter.
MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
- MachineModuleInfoImpl::StubValueTy &StubSym =
- GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) :
- MachOMMI.getGVStubEntry(SSym);
+ MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
if (StubSym.getPointer() == 0) {
MCSymbol *Sym = Mang->getSymbol(GV);
StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
new file mode 100644
index 0000000..0f59d01
--- /dev/null
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -0,0 +1,52 @@
+//===-- TargetOptionsImpl.cpp - Options that apply to all targets ----------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the methods in the TargetOptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+/// DisableFramePointerElim - This returns true if frame pointer elimination
+/// optimization should be disabled for the given machine function.
+bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
+ // Check to see if we should eliminate non-leaf frame pointers and then
+ // check to see if we should eliminate all frame pointers.
+ if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->hasCalls();
+ }
+
+ return NoFramePointerElim;
+}
+
+/// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
+/// is specified on the command line. When this flag is off(default), the
+/// code generator is not allowed to generate mad (multiply add) if the
+/// result is "less precise" than doing those operations individually.
+bool TargetOptions::LessPreciseFPMAD() const {
+ return UnsafeFPMath || LessPreciseFPMADOption;
+}
+
+/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
+/// that the rounding mode of the FPU can change from its default.
+bool TargetOptions::HonorSignDependentRoundingFPMath() const {
+ return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
+}
+
+/// getTrapFunctionName - If this returns a non-empty string, this means isel
+/// should lower Intrinsic::trap to a call to the specified function name
+/// instead of an ISD::TRAP node.
+StringRef TargetOptions::getTrapFunctionName() const {
+ return TrapFuncName;
+}
+
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index a2e8134..6a63335 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -242,7 +242,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
// appropriate location, we can try to sink the current instruction
// past it.
if (!KillMI || KillMI->getParent() != MBB || KillMI == MI ||
- KillMI->getDesc().isTerminator())
+ KillMI->isTerminator())
return false;
// If any of the definitions are used by another instruction between the
@@ -498,8 +498,7 @@ MachineInstr *findLocalKill(unsigned Reg, MachineBasicBlock *MBB,
MachineInstr *UseMI = &*UI;
if (UseMI == MI || UseMI->getParent() != MBB)
continue;
- DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
- if (DI != DistanceMap.end())
+ if (DistanceMap.count(UseMI))
continue;
if (!UI.getOperand().isKill())
return 0;
@@ -817,10 +816,9 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
static bool isSafeToDelete(MachineInstr *MI,
const TargetInstrInfo *TII,
SmallVector<unsigned, 4> &Kills) {
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.mayStore() || MCID.isCall())
+ if (MI->mayStore() || MI->isCall())
return false;
- if (MCID.isTerminator() || MI->hasUnmodeledSideEffects())
+ if (MI->isTerminator() || MI->hasUnmodeledSideEffects())
return false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -918,9 +916,8 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
// Don't mess with copies, they may be coalesced later.
return false;
- const MCInstrDesc &MCID = KillMI->getDesc();
- if (MCID.hasUnmodeledSideEffects() || MCID.isCall() || MCID.isBranch() ||
- MCID.isTerminator())
+ if (KillMI->hasUnmodeledSideEffects() || KillMI->isCall() ||
+ KillMI->isBranch() || KillMI->isTerminator())
// Don't move pass calls, etc.
return false;
@@ -975,9 +972,8 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
return false;
++NumVisited;
- const MCInstrDesc &OMCID = OtherMI->getDesc();
- if (OMCID.hasUnmodeledSideEffects() || OMCID.isCall() || OMCID.isBranch() ||
- OMCID.isTerminator())
+ if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() ||
+ OtherMI->isBranch() || OtherMI->isTerminator())
// Don't move pass calls, etc.
return false;
for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
@@ -1119,9 +1115,8 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
return false;
++NumVisited;
- const MCInstrDesc &MCID = OtherMI->getDesc();
- if (MCID.hasUnmodeledSideEffects() || MCID.isCall() || MCID.isBranch() ||
- MCID.isTerminator())
+ if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() ||
+ OtherMI->isBranch() || OtherMI->isTerminator())
// Don't move pass calls, etc.
return false;
SmallVector<unsigned, 2> OtherDefs;
@@ -1201,7 +1196,6 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
return false;
MachineInstr &MI = *mi;
- const MCInstrDesc &MCID = MI.getDesc();
unsigned regA = MI.getOperand(DstIdx).getReg();
unsigned regB = MI.getOperand(SrcIdx).getReg();
@@ -1223,7 +1217,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
unsigned regCIdx = ~0U;
bool TryCommute = false;
bool AggressiveCommute = false;
- if (MCID.isCommutable() && MI.getNumOperands() >= 3 &&
+ if (MI.isCommutable() && MI.getNumOperands() >= 3 &&
TII->findCommutedOpIndices(&MI, SrcOp1, SrcOp2)) {
if (SrcIdx == SrcOp1)
regCIdx = SrcOp2;
@@ -1261,7 +1255,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
if (TargetRegisterInfo::isVirtualRegister(regA))
ScanUses(regA, &*mbbi, Processed);
- if (MCID.isConvertibleTo3Addr()) {
+ if (MI.isConvertibleTo3Addr()) {
// This instruction is potentially convertible to a true
// three-address instruction. Check if it is profitable.
if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
@@ -1288,7 +1282,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
// movq (%rax), %rcx
// addq %rdx, %rcx
// because it's preferable to schedule a load than a register copy.
- if (MCID.mayLoad() && !regBKilled) {
+ if (MI.mayLoad() && !regBKilled) {
// Determine if a load can be unfolded.
unsigned LoadRegIndex;
unsigned NewOpc =
@@ -1531,7 +1525,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
// If it's safe and profitable, remat the definition instead of
// copying it.
if (DefMI &&
- DefMI->getDesc().isAsCheapAsAMove() &&
+ DefMI->isAsCheapAsAMove() &&
DefMI->isSafeToReMat(TII, AA, regB) &&
isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
diff --git a/lib/DebugInfo/CMakeLists.txt b/lib/DebugInfo/CMakeLists.txt
index fdffcb6..441f1e8 100644
--- a/lib/DebugInfo/CMakeLists.txt
+++ b/lib/DebugInfo/CMakeLists.txt
@@ -10,7 +10,3 @@ add_llvm_library(LLVMDebugInfo
DWARFDebugLine.cpp
DWARFFormValue.cpp
)
-
-add_llvm_library_dependencies(LLVMDebugInfo
- LLVMSupport
- )
diff --git a/lib/DebugInfo/LLVMBuild.txt b/lib/DebugInfo/LLVMBuild.txt
index b46d3d2..210b9f9 100644
--- a/lib/DebugInfo/LLVMBuild.txt
+++ b/lib/DebugInfo/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = DebugInfo
parent = Libraries
required_libraries = Support
-
diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
index fb14d41..58caae8 100644
--- a/lib/ExecutionEngine/CMakeLists.txt
+++ b/lib/ExecutionEngine/CMakeLists.txt
@@ -4,13 +4,6 @@ add_llvm_library(LLVMExecutionEngine
TargetSelect.cpp
)
-add_llvm_library_dependencies(LLVMExecutionEngine
- LLVMCore
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(Interpreter)
add_subdirectory(JIT)
add_subdirectory(MCJIT)
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 525877b..7829a29 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -28,6 +28,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/Host.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include <cmath>
@@ -41,14 +42,12 @@ ExecutionEngine *(*ExecutionEngine::JITCtor)(
Module *M,
std::string *ErrorStr,
JITMemoryManager *JMM,
- CodeGenOpt::Level OptLevel,
bool GVsWithCode,
TargetMachine *TM) = 0;
ExecutionEngine *(*ExecutionEngine::MCJITCtor)(
Module *M,
std::string *ErrorStr,
JITMemoryManager *JMM,
- CodeGenOpt::Level OptLevel,
bool GVsWithCode,
TargetMachine *TM) = 0;
ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M,
@@ -420,7 +419,7 @@ ExecutionEngine *ExecutionEngine::create(Module *M,
ExecutionEngine *ExecutionEngine::createJIT(Module *M,
std::string *ErrorStr,
JITMemoryManager *JMM,
- CodeGenOpt::Level OptLevel,
+ CodeGenOpt::Level OL,
bool GVsWithCode,
Reloc::Model RM,
CodeModel::Model CMM) {
@@ -436,11 +435,14 @@ ExecutionEngine *ExecutionEngine::createJIT(Module *M,
StringRef MCPU = "";
SmallVector<std::string, 1> MAttrs;
+ Triple TT(M->getTargetTriple());
+ // TODO: permit custom TargetOptions here
TargetMachine *TM =
- EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs, RM, CMM, ErrorStr);
+ EngineBuilder::selectTarget(TT, MArch, MCPU, MAttrs, TargetOptions(), RM,
+ CMM, OL, ErrorStr);
if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0;
- return ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel, GVsWithCode, TM);
+ return ExecutionEngine::JITCtor(M, ErrorStr, JMM, GVsWithCode, TM);
}
ExecutionEngine *EngineBuilder::create() {
@@ -465,17 +467,25 @@ ExecutionEngine *EngineBuilder::create() {
// Unless the interpreter was explicitly selected or the JIT is not linked,
// try making a JIT.
if (WhichEngine & EngineKind::JIT) {
- if (TargetMachine *TM = EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs,
+ Triple TT(M->getTargetTriple());
+ if (TargetMachine *TM = EngineBuilder::selectTarget(TT, MArch, MCPU, MAttrs,
+ Options,
RelocModel, CMModel,
- ErrorStr)) {
+ OptLevel, ErrorStr)) {
+ if (!TM->getTarget().hasJIT()) {
+ errs() << "WARNING: This target JIT is not designed for the host"
+ << " you are running. If bad things happen, please choose"
+ << " a different -march switch.\n";
+ }
+
if (UseMCJIT && ExecutionEngine::MCJITCtor) {
ExecutionEngine *EE =
- ExecutionEngine::MCJITCtor(M, ErrorStr, JMM, OptLevel,
+ ExecutionEngine::MCJITCtor(M, ErrorStr, JMM,
AllocateGVsWithCode, TM);
if (EE) return EE;
} else if (ExecutionEngine::JITCtor) {
ExecutionEngine *EE =
- ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel,
+ ExecutionEngine::JITCtor(M, ErrorStr, JMM,
AllocateGVsWithCode, TM);
if (EE) return EE;
}
diff --git a/lib/ExecutionEngine/Interpreter/CMakeLists.txt b/lib/ExecutionEngine/Interpreter/CMakeLists.txt
index 4fb58c2..d331f83 100644
--- a/lib/ExecutionEngine/Interpreter/CMakeLists.txt
+++ b/lib/ExecutionEngine/Interpreter/CMakeLists.txt
@@ -12,14 +12,6 @@ add_llvm_library(LLVMInterpreter
Interpreter.cpp
)
-add_llvm_library_dependencies(LLVMInterpreter
- LLVMCodeGen
- LLVMCore
- LLVMExecutionEngine
- LLVMSupport
- LLVMTarget
- )
-
if( LLVM_ENABLE_FFI )
target_link_libraries( LLVMInterpreter ${FFI_LIBRARY_PATH} )
endif()
diff --git a/lib/ExecutionEngine/Interpreter/LLVMBuild.txt b/lib/ExecutionEngine/Interpreter/LLVMBuild.txt
index 459426d..327b320 100644
--- a/lib/ExecutionEngine/Interpreter/LLVMBuild.txt
+++ b/lib/ExecutionEngine/Interpreter/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = Interpreter
parent = ExecutionEngine
required_libraries = CodeGen Core ExecutionEngine Support Target
-
diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt
index 813ccce..cefb0ae 100644
--- a/lib/ExecutionEngine/JIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/JIT/CMakeLists.txt
@@ -10,13 +10,3 @@ add_llvm_library(LLVMJIT
JITMemoryManager.cpp
OProfileJITEventListener.cpp
)
-
-add_llvm_library_dependencies(LLVMJIT
- LLVMCodeGen
- LLVMCore
- LLVMExecutionEngine
- LLVMMC
- LLVMRuntimeDyld
- LLVMSupport
- LLVMTarget
- )
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index d773009..e4f6bc4 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -206,7 +206,6 @@ void DarwinRegisterFrame(void* FrameBegin) {
ExecutionEngine *JIT::createJIT(Module *M,
std::string *ErrorStr,
JITMemoryManager *JMM,
- CodeGenOpt::Level OptLevel,
bool GVsWithCode,
TargetMachine *TM) {
// Try to register the program as a source of symbols to resolve against.
@@ -216,7 +215,7 @@ ExecutionEngine *JIT::createJIT(Module *M,
// If the target supports JIT code generation, create the JIT.
if (TargetJITInfo *TJ = TM->getJITInfo()) {
- return new JIT(M, *TM, *TJ, JMM, OptLevel, GVsWithCode);
+ return new JIT(M, *TM, *TJ, JMM, GVsWithCode);
} else {
if (ErrorStr)
*ErrorStr = "target does not support JIT code generation";
@@ -268,7 +267,7 @@ extern "C" {
}
JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
- JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode)
+ JITMemoryManager *JMM, bool GVsWithCode)
: ExecutionEngine(M), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode),
isAlreadyCodeGenerating(false) {
setTargetData(TM.getTargetData());
@@ -288,7 +287,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
// Turn the machine code intermediate representation into bytes in memory that
// may be executed.
- if (TM.addPassesToEmitMachineCode(PM, *JCE, OptLevel)) {
+ if (TM.addPassesToEmitMachineCode(PM, *JCE)) {
report_fatal_error("Target does not support machine code emission!");
}
@@ -341,7 +340,7 @@ void JIT::addModule(Module *M) {
// Turn the machine code intermediate representation into bytes in memory
// that may be executed.
- if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
+ if (TM.addPassesToEmitMachineCode(PM, *JCE)) {
report_fatal_error("Target does not support machine code emission!");
}
@@ -372,7 +371,7 @@ bool JIT::removeModule(Module *M) {
// Turn the machine code intermediate representation into bytes in memory
// that may be executed.
- if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
+ if (TM.addPassesToEmitMachineCode(PM, *JCE)) {
report_fatal_error("Target does not support machine code emission!");
}
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index 92dcb0e..fbb9416 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -78,8 +78,7 @@ class JIT : public ExecutionEngine {
JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
- JITMemoryManager *JMM, CodeGenOpt::Level OptLevel,
- bool AllocateGVsWithCode);
+ JITMemoryManager *JMM, bool AllocateGVsWithCode);
public:
~JIT();
@@ -185,7 +184,6 @@ public:
static ExecutionEngine *createJIT(Module *M,
std::string *ErrorStr,
JITMemoryManager *JMM,
- CodeGenOpt::Level OptLevel,
bool GVsWithCode,
TargetMachine *TM);
diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
index 2e90968..abb70fb 100644
--- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
+++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
@@ -115,7 +115,7 @@ std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) {
// When trying to debug why GDB isn't getting the debug info right, it's
// awfully helpful to write the object file to disk so that it can be
// inspected with readelf and objdump.
- if (JITEmitDebugInfoToDisk) {
+ if (TM.Options.JITEmitDebugInfoToDisk) {
std::string Filename;
raw_string_ostream O2(Filename);
O2 << "/tmp/llvm_function_" << I.FnStart << "_" << F->getName() << ".o";
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index 8f84ac7..42a136e 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -313,7 +313,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
MI != E; ++MI) {
if (!MI->isLabel()) {
- MayThrow |= MI->getDesc().isCall();
+ MayThrow |= MI->isCall();
continue;
}
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 24020ee..d9fa509 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -362,10 +362,16 @@ namespace {
/// Instance of the JIT
JIT *TheJIT;
+ bool JITExceptionHandling;
+
+ bool JITEmitDebugInfo;
+
public:
JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
: SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0),
- EmittedFunctions(this), TheJIT(&jit) {
+ EmittedFunctions(this), TheJIT(&jit),
+ JITExceptionHandling(TM.Options.JITExceptionHandling),
+ JITEmitDebugInfo(TM.Options.JITEmitDebugInfo) {
MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
if (jit.getJITInfo().needsGOT()) {
MemMgr->AllocateGOT();
@@ -1037,7 +1043,7 @@ void JITEmitter::deallocateMemForFunction(const Function *F) {
EmittedFunctions.erase(Emitted);
}
- if(JITExceptionHandling) {
+ if (JITExceptionHandling) {
TheJIT->DeregisterTable(F);
}
@@ -1047,7 +1053,7 @@ void JITEmitter::deallocateMemForFunction(const Function *F) {
}
-void* JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) {
+void *JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) {
if (BufferBegin)
return JITCodeEmitter::allocateSpace(Size, Alignment);
@@ -1059,7 +1065,7 @@ void* JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) {
return CurBufferPtr;
}
-void* JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) {
+void *JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) {
// Delegate this call through the memory manager.
return MemMgr->allocateGlobal(Size, Alignment);
}
diff --git a/lib/ExecutionEngine/JIT/LLVMBuild.txt b/lib/ExecutionEngine/JIT/LLVMBuild.txt
index 21cb300..ca2a565 100644
--- a/lib/ExecutionEngine/JIT/LLVMBuild.txt
+++ b/lib/ExecutionEngine/JIT/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = JIT
parent = ExecutionEngine
required_libraries = CodeGen Core ExecutionEngine MC RuntimeDyld Support Target
-
diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt
index 1ef6a44..d426969 100644
--- a/lib/ExecutionEngine/LLVMBuild.txt
+++ b/lib/ExecutionEngine/LLVMBuild.txt
@@ -15,9 +15,11 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = Interpreter JIT MCJIT RuntimeDyld
+
[component_0]
type = Library
name = ExecutionEngine
parent = Libraries
required_libraries = Core MC Support Target
-
diff --git a/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
index aae8a1b..38fdffa 100644
--- a/lib/ExecutionEngine/MCJIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
@@ -2,11 +2,3 @@ add_llvm_library(LLVMMCJIT
MCJIT.cpp
Intercept.cpp
)
-
-add_llvm_library_dependencies(LLVMMCJIT
- LLVMCore
- LLVMExecutionEngine
- LLVMRuntimeDyld
- LLVMSupport
- LLVMTarget
- )
diff --git a/lib/ExecutionEngine/MCJIT/LLVMBuild.txt b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt
index 9b08d3b..90f4d2f 100644
--- a/lib/ExecutionEngine/MCJIT/LLVMBuild.txt
+++ b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = MCJIT
parent = ExecutionEngine
required_libraries = Core ExecutionEngine RuntimeDyld Support Target
-
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index d5f407d..d5aaec9 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -36,7 +36,6 @@ extern "C" void LLVMLinkInMCJIT() {
ExecutionEngine *MCJIT::createJIT(Module *M,
std::string *ErrorStr,
JITMemoryManager *JMM,
- CodeGenOpt::Level OptLevel,
bool GVsWithCode,
TargetMachine *TM) {
// Try to register the program as a source of symbols to resolve against.
@@ -46,8 +45,7 @@ ExecutionEngine *MCJIT::createJIT(Module *M,
// If the target supports JIT code generation, create the JIT.
if (TargetJITInfo *TJ = TM->getJITInfo())
- return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM, M), OptLevel,
- GVsWithCode);
+ return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM, M), GVsWithCode);
if (ErrorStr)
*ErrorStr = "target does not support JIT code generation";
@@ -55,8 +53,7 @@ ExecutionEngine *MCJIT::createJIT(Module *M,
}
MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji,
- RTDyldMemoryManager *MM, CodeGenOpt::Level OptLevel,
- bool AllocateGVsWithCode)
+ RTDyldMemoryManager *MM, bool AllocateGVsWithCode)
: ExecutionEngine(m), TM(tm), MemMgr(MM), M(m), OS(Buffer), Dyld(MM) {
setTargetData(TM->getTargetData());
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index b64c21a..2a98fc9 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -24,8 +24,7 @@ namespace llvm {
class MCJIT : public ExecutionEngine {
MCJIT(Module *M, TargetMachine *tm, TargetJITInfo &tji,
- RTDyldMemoryManager *MemMgr, CodeGenOpt::Level OptLevel,
- bool AllocateGVsWithCode);
+ RTDyldMemoryManager *MemMgr, bool AllocateGVsWithCode);
TargetMachine *TM;
MCContext *Ctx;
@@ -79,7 +78,6 @@ public:
static ExecutionEngine *createJIT(Module *M,
std::string *ErrorStr,
JITMemoryManager *JMM,
- CodeGenOpt::Level OptLevel,
bool GVsWithCode,
TargetMachine *TM);
diff --git a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
index c236d1d..59bdfee3 100644
--- a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
+++ b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
@@ -2,8 +2,3 @@ add_llvm_library(LLVMRuntimeDyld
RuntimeDyld.cpp
RuntimeDyldMachO.cpp
)
-
-add_llvm_library_dependencies(LLVMRuntimeDyld
- LLVMObject
- LLVMSupport
- )
diff --git a/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt b/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt
index 5e39814..97dc861 100644
--- a/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt
+++ b/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = RuntimeDyld
parent = ExecutionEngine
required_libraries = Object Support
-
diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp
index 45480a6..3937fe5 100644
--- a/lib/ExecutionEngine/TargetSelect.cpp
+++ b/lib/ExecutionEngine/TargetSelect.cpp
@@ -7,33 +7,35 @@
//
//===----------------------------------------------------------------------===//
//
-// This just asks the TargetRegistry for the appropriate JIT to use, and allows
-// the user to specify a specific one on the commandline with -march=x. Clients
-// should initialize targets prior to calling createJIT.
+// This just asks the TargetRegistry for the appropriate target to use, and
+// allows the user to specify a specific one on the commandline with -march=x,
+// -mcpu=y, and -mattr=a,-b,+c. Clients should initialize targets prior to
+// calling selectTarget().
//
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/Module.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
/// selectTarget - Pick a target either via -march or by guessing the native
/// arch. Add any CPU features specified via -mcpu or -mattr.
-TargetMachine *EngineBuilder::selectTarget(Module *Mod,
+TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple,
StringRef MArch,
StringRef MCPU,
const SmallVectorImpl<std::string>& MAttrs,
+ const TargetOptions &Options,
Reloc::Model RM,
CodeModel::Model CM,
+ CodeGenOpt::Level OL,
std::string *ErrorStr) {
- Triple TheTriple(Mod->getTargetTriple());
+ Triple TheTriple(TargetTriple);
if (TheTriple.getTriple().empty())
TheTriple.setTriple(sys::getDefaultTargetTriple());
@@ -55,7 +57,7 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod,
}
// Adjust the triple to match (if known), otherwise stick with the
- // module/host triple.
+ // requested/host triple.
Triple::ArchType Type = Triple::getArchTypeForLLVMName(MArch);
if (Type != Triple::UnknownArch)
TheTriple.setArch(Type);
@@ -69,12 +71,6 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod,
}
}
- if (!TheTarget->hasJIT()) {
- errs() << "WARNING: This target JIT is not designed for the host you are"
- << " running. If bad things happen, please choose a different "
- << "-march switch.\n";
- }
-
// Package up features to be passed to target/subtarget
std::string FeaturesStr;
if (!MAttrs.empty()) {
@@ -87,7 +83,8 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod,
// Allocate a target...
TargetMachine *Target = TheTarget->createTargetMachine(TheTriple.getTriple(),
MCPU, FeaturesStr,
- RM, CM);
+ Options,
+ RM, CM, OL);
assert(Target && "Could not allocate target machine!");
return Target;
}
diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt
index c3fa1ff..e22b8cd 100644
--- a/lib/LLVMBuild.txt
+++ b/lib/LLVMBuild.txt
@@ -15,8 +15,10 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = Analysis Archive AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker MC Object Support TableGen Target Transforms VMCore
+
[component_0]
type = Group
name = Libraries
parent = $ROOT
-
diff --git a/lib/Linker/CMakeLists.txt b/lib/Linker/CMakeLists.txt
index 4d8824b..0b6d2f4 100644
--- a/lib/Linker/CMakeLists.txt
+++ b/lib/Linker/CMakeLists.txt
@@ -4,11 +4,3 @@ add_llvm_library(LLVMLinker
LinkModules.cpp
Linker.cpp
)
-
-add_llvm_library_dependencies(LLVMLinker
- LLVMArchive
- LLVMBitReader
- LLVMCore
- LLVMSupport
- LLVMTransformUtils
- )
diff --git a/lib/Linker/LLVMBuild.txt b/lib/Linker/LLVMBuild.txt
index 69f2ac4..2b4c232 100644
--- a/lib/Linker/LLVMBuild.txt
+++ b/lib/Linker/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = Linker
parent = Libraries
required_libraries = Archive BitReader Core Support TransformUtils
-
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index a4ac1bf..b2e62a5 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -45,10 +45,5 @@ add_llvm_library(LLVMMC
WinCOFFStreamer.cpp
)
-add_llvm_library_dependencies(LLVMMC
- LLVMObject
- LLVMSupport
- )
-
add_subdirectory(MCParser)
add_subdirectory(MCDisassembler)
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index bd28069..92aad94 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -182,7 +182,7 @@ uint64_t ELFObjectWriter::SymbolValue(MCSymbolData &Data,
if (const MCExpr *Value = Symbol.getVariableValue()) {
int64_t IntValue;
if (Value->EvaluateAsAbsolute(IntValue, Layout))
- return (uint64_t)IntValue;
+ return (uint64_t)IntValue;
}
}
@@ -1072,7 +1072,7 @@ void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm,
WriteBytes(cast<MCDataFragment>(F).getContents().str());
}
} else {
- Asm.WriteSectionData(&SD, Layout);
+ Asm.writeSectionData(&SD, Layout);
}
}
@@ -1742,14 +1742,26 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
}
} else {
if (IsPCRel) {
- switch (Modifier) {
- default:
- llvm_unreachable("Unimplemented");
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_386_PC32;
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+
+ case X86::reloc_global_offset_table:
+ Type = ELF::R_386_GOTPC;
break;
- case MCSymbolRefExpr::VK_PLT:
- Type = ELF::R_386_PLT32;
+
+ case X86::reloc_signed_4byte:
+ case FK_PCRel_4:
+ case FK_Data_4:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_386_PC32;
+ break;
+ case MCSymbolRefExpr::VK_PLT:
+ Type = ELF::R_386_PLT32;
+ break;
+ }
break;
}
} else {
@@ -1831,6 +1843,21 @@ void MipsELFObjectWriter::WriteEFlags() {
ELF::EF_MIPS_ARCH_32R2);
}
+const MCSymbol *MipsELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ assert(Target.getSymA() && "SymA cannot be 0.");
+ const MCSymbol &Sym = Target.getSymA()->getSymbol();
+
+ if (Sym.getSection().getKind().isMergeableCString() ||
+ Sym.getSection().getKind().isMergeableConst())
+ return &Sym;
+
+ return NULL;
+}
+
unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel,
@@ -1858,7 +1885,8 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
case Mips::fixup_Mips_CALL16:
Type = ELF::R_MIPS_CALL16;
break;
- case Mips::fixup_Mips_GOT16:
+ case Mips::fixup_Mips_GOT_Global:
+ case Mips::fixup_Mips_GOT_Local:
Type = ELF::R_MIPS_GOT16;
break;
case Mips::fixup_Mips_HI16:
@@ -1887,4 +1915,3 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
return Type;
}
-
diff --git a/lib/MC/ELFObjectWriter.h b/lib/MC/ELFObjectWriter.h
index 7838206..9adf0b1 100644
--- a/lib/MC/ELFObjectWriter.h
+++ b/lib/MC/ELFObjectWriter.h
@@ -445,6 +445,12 @@ class ELFObjectWriter : public MCObjectWriter {
virtual void WriteEFlags();
protected:
+ virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
+
virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel, bool IsRelocWithSymbol,
int64_t Addend);
diff --git a/lib/MC/LLVMBuild.txt b/lib/MC/LLVMBuild.txt
index 8ad66b6..f35dbe4 100644
--- a/lib/MC/LLVMBuild.txt
+++ b/lib/MC/LLVMBuild.txt
@@ -15,9 +15,11 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = MCDisassembler MCParser
+
[component_0]
type = Library
name = MC
parent = Libraries
required_libraries = Object Support
-
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index c330e74..b1e1bdf 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -29,7 +29,6 @@ MCAsmInfo::MCAsmInfo() {
HasSubsectionsViaSymbols = false;
HasMachoZeroFillDirective = false;
HasMachoTBSSDirective = false;
- StructorOutputOrder = Structors::ReversePriorityOrder;
HasStaticCtorDtorReferenceInStaticMode = false;
LinkerRequiresNonEmptyDwarfLines = false;
MaxInstLength = 4;
diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp
index 434d910..6d34801 100644
--- a/lib/MC/MCAsmInfoCOFF.cpp
+++ b/lib/MC/MCAsmInfoCOFF.cpp
@@ -38,3 +38,11 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
SupportsDataRegions = false;
}
+
+MCAsmInfoMicrosoft::MCAsmInfoMicrosoft() {
+ AllowQuotesInName = true;
+}
+
+MCAsmInfoGNUCOFF::MCAsmInfoGNUCOFF() {
+
+}
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index 537d0a3..24f1243 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -39,7 +39,6 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
ZeroDirective = "\t.space\t"; // ".space N" emits N zeros.
HasMachoZeroFillDirective = true; // Uses .zerofill
HasMachoTBSSDirective = true; // Uses .tbss
- StructorOutputOrder = Structors::PriorityOrder;
HasStaticCtorDtorReferenceInStaticMode = true;
CodeBegin = "L$start$code$";
@@ -57,8 +56,9 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
HiddenVisibilityAttr = MCSA_PrivateExtern;
HiddenDeclarationVisibilityAttr = MCSA_Invalid;
+
// Doesn't support protected visibility.
- ProtectedVisibilityAttr = MCSA_Global;
+ ProtectedVisibilityAttr = MCSA_Invalid;
HasDotTypeDotSizeDirective = false;
HasNoDeadStrip = true;
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index d90f7b2..c785c03 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -1284,6 +1284,10 @@ void MCAsmStreamer::Finish() {
if (getContext().hasDwarfFiles() && !UseLoc)
MCDwarfFileTable::Emit(this);
+ // If we are generating dwarf for assembly source files dump out the sections.
+ if (getContext().getGenDwarfForAssembly())
+ MCGenDwarfInfo::Emit(this);
+
if (!UseCFI)
EmitFrames(false);
}
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 29adbcb..c5bf6b9 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -33,7 +33,7 @@ using namespace llvm;
namespace {
namespace stats {
STATISTIC(EmittedFragments, "Number of emitted assembler fragments");
-STATISTIC(EvaluateFixup, "Number of evaluated fixups");
+STATISTIC(evaluateFixup, "Number of evaluated fixups");
STATISTIC(FragmentLayouts, "Number of fragment layouts");
STATISTIC(ObjectBytes, "Number of emitted object file bytes");
STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps");
@@ -136,7 +136,7 @@ uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const {
uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const {
// The size is the last fragment's end offset.
const MCFragment &F = SD->getFragmentList().back();
- return getFragmentOffset(&F) + getAssembler().ComputeFragmentSize(*this, F);
+ return getFragmentOffset(&F) + getAssembler().computeFragmentSize(*this, F);
}
uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const {
@@ -237,10 +237,10 @@ const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
return SD->getFragment()->getAtom();
}
-bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
+bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
const MCFixup &Fixup, const MCFragment *DF,
MCValue &Target, uint64_t &Value) const {
- ++stats::EvaluateFixup;
+ ++stats::evaluateFixup;
if (!Fixup.getValue()->EvaluateAsRelocatable(Target, Layout))
report_fatal_error("expected relocatable expression");
@@ -312,7 +312,7 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
return IsResolved;
}
-uint64_t MCAssembler::ComputeFragmentSize(const MCAsmLayout &Layout,
+uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
const MCFragment &F) const {
switch (F.getKind()) {
case MCFragment::FT_Data:
@@ -374,7 +374,7 @@ void MCAsmLayout::LayoutFragment(MCFragment *F) {
// Compute fragment offset and size.
uint64_t Offset = 0;
if (Prev)
- Offset += Prev->Offset + getAssembler().ComputeFragmentSize(*this, *Prev);
+ Offset += Prev->Offset + getAssembler().computeFragmentSize(*this, *Prev);
F->Offset = Offset;
LastValidFragment[F->getParent()] = F;
@@ -390,7 +390,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
++stats::EmittedFragments;
// FIXME: Embed in fragments instead?
- uint64_t FragmentSize = Asm.ComputeFragmentSize(Layout, F);
+ uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F);
switch (F.getKind()) {
case MCFragment::FT_Align: {
MCAlignFragment &AF = cast<MCAlignFragment>(F);
@@ -493,7 +493,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
assert(OW->getStream().tell() - Start == FragmentSize);
}
-void MCAssembler::WriteSectionData(const MCSectionData *SD,
+void MCAssembler::writeSectionData(const MCSectionData *SD,
const MCAsmLayout &Layout) const {
// Ignore virtual sections.
if (SD->getSection().isVirtualSection()) {
@@ -546,13 +546,13 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD,
}
-uint64_t MCAssembler::HandleFixup(const MCAsmLayout &Layout,
+uint64_t MCAssembler::handleFixup(const MCAsmLayout &Layout,
MCFragment &F,
const MCFixup &Fixup) {
// Evaluate the fixup.
MCValue Target;
uint64_t FixedValue;
- if (!EvaluateFixup(Layout, Fixup, &F, Target, FixedValue)) {
+ if (!evaluateFixup(Layout, Fixup, &F, Target, FixedValue)) {
// The fixup was unresolved, we need a relocation. Inform the object
// writer of the relocation, and give it an opportunity to adjust the
// fixup value if need be.
@@ -592,7 +592,7 @@ void MCAssembler::Finish() {
}
// Layout until everything fits.
- while (LayoutOnce(Layout))
+ while (layoutOnce(Layout))
continue;
DEBUG_WITH_TYPE("mc-dump", {
@@ -600,7 +600,7 @@ void MCAssembler::Finish() {
dump(); });
// Finalize the layout, including fragment lowering.
- FinishLayout(Layout);
+ finishLayout(Layout);
DEBUG_WITH_TYPE("mc-dump", {
llvm::errs() << "assembler backend - final-layout\n--\n";
@@ -621,7 +621,7 @@ void MCAssembler::Finish() {
for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(),
ie3 = DF->fixup_end(); it3 != ie3; ++it3) {
MCFixup &Fixup = *it3;
- uint64_t FixedValue = HandleFixup(Layout, *DF, Fixup);
+ uint64_t FixedValue = handleFixup(Layout, *DF, Fixup);
getBackend().ApplyFixup(Fixup, DF->getContents().data(),
DF->getContents().size(), FixedValue);
}
@@ -631,7 +631,7 @@ void MCAssembler::Finish() {
for (MCInstFragment::fixup_iterator it3 = IF->fixup_begin(),
ie3 = IF->fixup_end(); it3 != ie3; ++it3) {
MCFixup &Fixup = *it3;
- uint64_t FixedValue = HandleFixup(Layout, *IF, Fixup);
+ uint64_t FixedValue = handleFixup(Layout, *IF, Fixup);
getBackend().ApplyFixup(Fixup, IF->getCode().data(),
IF->getCode().size(), FixedValue);
}
@@ -645,8 +645,8 @@ void MCAssembler::Finish() {
stats::ObjectBytes += OS.tell() - StartOffset;
}
-bool MCAssembler::FixupNeedsRelaxation(const MCFixup &Fixup,
- const MCFragment *DF,
+bool MCAssembler::fixupNeedsRelaxation(const MCFixup &Fixup,
+ const MCInstFragment *DF,
const MCAsmLayout &Layout) const {
if (getRelaxAll())
return true;
@@ -654,16 +654,13 @@ bool MCAssembler::FixupNeedsRelaxation(const MCFixup &Fixup,
// If we cannot resolve the fixup value, it requires relaxation.
MCValue Target;
uint64_t Value;
- if (!EvaluateFixup(Layout, Fixup, DF, Target, Value))
+ if (!evaluateFixup(Layout, Fixup, DF, Target, Value))
return true;
- // Otherwise, relax if the value is too big for a (signed) i8.
- //
- // FIXME: This is target dependent!
- return int64_t(Value) != int64_t(int8_t(Value));
+ return getBackend().fixupNeedsRelaxation(Fixup, Value, DF, Layout);
}
-bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF,
+bool MCAssembler::fragmentNeedsRelaxation(const MCInstFragment *IF,
const MCAsmLayout &Layout) const {
// If this inst doesn't ever need relaxation, ignore it. This occurs when we
// are intentionally pushing out inst fragments, or because we relaxed a
@@ -673,15 +670,15 @@ bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF,
for (MCInstFragment::const_fixup_iterator it = IF->fixup_begin(),
ie = IF->fixup_end(); it != ie; ++it)
- if (FixupNeedsRelaxation(*it, IF, Layout))
+ if (fixupNeedsRelaxation(*it, IF, Layout))
return true;
return false;
}
-bool MCAssembler::RelaxInstruction(MCAsmLayout &Layout,
+bool MCAssembler::relaxInstruction(MCAsmLayout &Layout,
MCInstFragment &IF) {
- if (!FragmentNeedsRelaxation(&IF, Layout))
+ if (!fragmentNeedsRelaxation(&IF, Layout))
return false;
++stats::RelaxedInstructions;
@@ -715,7 +712,7 @@ bool MCAssembler::RelaxInstruction(MCAsmLayout &Layout,
return true;
}
-bool MCAssembler::RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
+bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
int64_t Value = 0;
uint64_t OldSize = LF.getContents().size();
bool IsAbs = LF.getValue().EvaluateAsAbsolute(Value, Layout);
@@ -732,8 +729,8 @@ bool MCAssembler::RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
return OldSize != LF.getContents().size();
}
-bool MCAssembler::RelaxDwarfLineAddr(MCAsmLayout &Layout,
- MCDwarfLineAddrFragment &DF) {
+bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout,
+ MCDwarfLineAddrFragment &DF) {
int64_t AddrDelta = 0;
uint64_t OldSize = DF.getContents().size();
bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout);
@@ -749,7 +746,7 @@ bool MCAssembler::RelaxDwarfLineAddr(MCAsmLayout &Layout,
return OldSize != Data.size();
}
-bool MCAssembler::RelaxDwarfCallFrameFragment(MCAsmLayout &Layout,
+bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout,
MCDwarfCallFrameFragment &DF) {
int64_t AddrDelta = 0;
uint64_t OldSize = DF.getContents().size();
@@ -764,7 +761,7 @@ bool MCAssembler::RelaxDwarfCallFrameFragment(MCAsmLayout &Layout,
return OldSize != Data.size();
}
-bool MCAssembler::LayoutSectionOnce(MCAsmLayout &Layout,
+bool MCAssembler::layoutSectionOnce(MCAsmLayout &Layout,
MCSectionData &SD) {
MCFragment *FirstInvalidFragment = NULL;
// Scan for fragments that need relaxation.
@@ -776,19 +773,19 @@ bool MCAssembler::LayoutSectionOnce(MCAsmLayout &Layout,
default:
break;
case MCFragment::FT_Inst:
- relaxedFrag = RelaxInstruction(Layout, *cast<MCInstFragment>(it2));
+ relaxedFrag = relaxInstruction(Layout, *cast<MCInstFragment>(it2));
break;
case MCFragment::FT_Dwarf:
- relaxedFrag = RelaxDwarfLineAddr(Layout,
+ relaxedFrag = relaxDwarfLineAddr(Layout,
*cast<MCDwarfLineAddrFragment>(it2));
break;
case MCFragment::FT_DwarfFrame:
relaxedFrag =
- RelaxDwarfCallFrameFragment(Layout,
+ relaxDwarfCallFrameFragment(Layout,
*cast<MCDwarfCallFrameFragment>(it2));
break;
case MCFragment::FT_LEB:
- relaxedFrag = RelaxLEB(Layout, *cast<MCLEBFragment>(it2));
+ relaxedFrag = relaxLEB(Layout, *cast<MCLEBFragment>(it2));
break;
}
// Update the layout, and remember that we relaxed.
@@ -802,20 +799,20 @@ bool MCAssembler::LayoutSectionOnce(MCAsmLayout &Layout,
return false;
}
-bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) {
+bool MCAssembler::layoutOnce(MCAsmLayout &Layout) {
++stats::RelaxationSteps;
bool WasRelaxed = false;
for (iterator it = begin(), ie = end(); it != ie; ++it) {
MCSectionData &SD = *it;
- while(LayoutSectionOnce(Layout, SD))
+ while(layoutSectionOnce(Layout, SD))
WasRelaxed = true;
}
return WasRelaxed;
}
-void MCAssembler::FinishLayout(MCAsmLayout &Layout) {
+void MCAssembler::finishLayout(MCAsmLayout &Layout) {
// The layout is done. Mark every fragment as valid.
for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
Layout.getFragmentOffset(&*Layout.getSectionOrder()[i]->rbegin());
diff --git a/lib/MC/MCDisassembler/CMakeLists.txt b/lib/MC/MCDisassembler/CMakeLists.txt
index 5cf5f1b..5e2cd83 100644
--- a/lib/MC/MCDisassembler/CMakeLists.txt
+++ b/lib/MC/MCDisassembler/CMakeLists.txt
@@ -2,12 +2,7 @@ add_llvm_library(LLVMMCDisassembler
Disassembler.cpp
EDDisassembler.cpp
EDInst.cpp
+ EDMain.cpp
EDOperand.cpp
EDToken.cpp
)
-
-add_llvm_library_dependencies(LLVMMCDisassembler
- LLVMMC
- LLVMMCParser
- LLVMSupport
- )
diff --git a/lib/MC/MCDisassembler/EDMain.cpp b/lib/MC/MCDisassembler/EDMain.cpp
new file mode 100644
index 0000000..3fd355b
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDMain.cpp
@@ -0,0 +1,280 @@
+//===-- EDMain.cpp - LLVM Enhanced Disassembly C API ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the enhanced disassembler's public C API.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDDisassembler.h"
+#include "EDInst.h"
+#include "EDOperand.h"
+#include "EDToken.h"
+#include "llvm-c/EnhancedDisassembly.h"
+using namespace llvm;
+
+int EDGetDisassembler(EDDisassemblerRef *disassembler,
+ const char *triple,
+ EDAssemblySyntax_t syntax) {
+ EDDisassembler::AssemblySyntax Syntax;
+ switch (syntax) {
+ default: assert(0 && "Unknown assembly syntax!");
+ case kEDAssemblySyntaxX86Intel:
+ Syntax = EDDisassembler::kEDAssemblySyntaxX86Intel;
+ break;
+ case kEDAssemblySyntaxX86ATT:
+ Syntax = EDDisassembler::kEDAssemblySyntaxX86ATT;
+ break;
+ case kEDAssemblySyntaxARMUAL:
+ Syntax = EDDisassembler::kEDAssemblySyntaxARMUAL;
+ break;
+ }
+
+ EDDisassemblerRef ret = EDDisassembler::getDisassembler(triple, Syntax);
+
+ if (!ret)
+ return -1;
+ *disassembler = ret;
+ return 0;
+}
+
+int EDGetRegisterName(const char** regName,
+ EDDisassemblerRef disassembler,
+ unsigned regID) {
+ const char *name = ((EDDisassembler*)disassembler)->nameWithRegisterID(regID);
+ if (!name)
+ return -1;
+ *regName = name;
+ return 0;
+}
+
+int EDRegisterIsStackPointer(EDDisassemblerRef disassembler,
+ unsigned regID) {
+ return ((EDDisassembler*)disassembler)->registerIsStackPointer(regID) ? 1 : 0;
+}
+
+int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler,
+ unsigned regID) {
+ return ((EDDisassembler*)disassembler)->registerIsProgramCounter(regID) ? 1:0;
+}
+
+unsigned int EDCreateInsts(EDInstRef *insts,
+ unsigned int count,
+ EDDisassemblerRef disassembler,
+ ::EDByteReaderCallback byteReader,
+ uint64_t address,
+ void *arg) {
+ unsigned int index;
+
+ for (index = 0; index < count; ++index) {
+ EDInst *inst = ((EDDisassembler*)disassembler)->createInst(byteReader,
+ address, arg);
+
+ if (!inst)
+ return index;
+
+ insts[index] = inst;
+ address += inst->byteSize();
+ }
+
+ return count;
+}
+
+void EDReleaseInst(EDInstRef inst) {
+ delete ((EDInst*)inst);
+}
+
+int EDInstByteSize(EDInstRef inst) {
+ return ((EDInst*)inst)->byteSize();
+}
+
+int EDGetInstString(const char **buf,
+ EDInstRef inst) {
+ return ((EDInst*)inst)->getString(*buf);
+}
+
+int EDInstID(unsigned *instID, EDInstRef inst) {
+ *instID = ((EDInst*)inst)->instID();
+ return 0;
+}
+
+int EDInstIsBranch(EDInstRef inst) {
+ return ((EDInst*)inst)->isBranch();
+}
+
+int EDInstIsMove(EDInstRef inst) {
+ return ((EDInst*)inst)->isMove();
+}
+
+int EDBranchTargetID(EDInstRef inst) {
+ return ((EDInst*)inst)->branchTargetID();
+}
+
+int EDMoveSourceID(EDInstRef inst) {
+ return ((EDInst*)inst)->moveSourceID();
+}
+
+int EDMoveTargetID(EDInstRef inst) {
+ return ((EDInst*)inst)->moveTargetID();
+}
+
+int EDNumTokens(EDInstRef inst) {
+ return ((EDInst*)inst)->numTokens();
+}
+
+int EDGetToken(EDTokenRef *token,
+ EDInstRef inst,
+ int index) {
+ return ((EDInst*)inst)->getToken(*(EDToken**)token, index);
+}
+
+int EDGetTokenString(const char **buf,
+ EDTokenRef token) {
+ return ((EDToken*)token)->getString(*buf);
+}
+
+int EDOperandIndexForToken(EDTokenRef token) {
+ return ((EDToken*)token)->operandID();
+}
+
+int EDTokenIsWhitespace(EDTokenRef token) {
+ return ((EDToken*)token)->type() == EDToken::kTokenWhitespace;
+}
+
+int EDTokenIsPunctuation(EDTokenRef token) {
+ return ((EDToken*)token)->type() == EDToken::kTokenPunctuation;
+}
+
+int EDTokenIsOpcode(EDTokenRef token) {
+ return ((EDToken*)token)->type() == EDToken::kTokenOpcode;
+}
+
+int EDTokenIsLiteral(EDTokenRef token) {
+ return ((EDToken*)token)->type() == EDToken::kTokenLiteral;
+}
+
+int EDTokenIsRegister(EDTokenRef token) {
+ return ((EDToken*)token)->type() == EDToken::kTokenRegister;
+}
+
+int EDTokenIsNegativeLiteral(EDTokenRef token) {
+ if (((EDToken*)token)->type() != EDToken::kTokenLiteral)
+ return -1;
+
+ return ((EDToken*)token)->literalSign();
+}
+
+int EDLiteralTokenAbsoluteValue(uint64_t *value, EDTokenRef token) {
+ if (((EDToken*)token)->type() != EDToken::kTokenLiteral)
+ return -1;
+
+ return ((EDToken*)token)->literalAbsoluteValue(*value);
+}
+
+int EDRegisterTokenValue(unsigned *registerID,
+ EDTokenRef token) {
+ if (((EDToken*)token)->type() != EDToken::kTokenRegister)
+ return -1;
+
+ return ((EDToken*)token)->registerID(*registerID);
+}
+
+int EDNumOperands(EDInstRef inst) {
+ return ((EDInst*)inst)->numOperands();
+}
+
+int EDGetOperand(EDOperandRef *operand,
+ EDInstRef inst,
+ int index) {
+ return ((EDInst*)inst)->getOperand(*(EDOperand**)operand, index);
+}
+
+int EDOperandIsRegister(EDOperandRef operand) {
+ return ((EDOperand*)operand)->isRegister();
+}
+
+int EDOperandIsImmediate(EDOperandRef operand) {
+ return ((EDOperand*)operand)->isImmediate();
+}
+
+int EDOperandIsMemory(EDOperandRef operand) {
+ return ((EDOperand*)operand)->isMemory();
+}
+
+int EDRegisterOperandValue(unsigned *value, EDOperandRef operand) {
+ if (!((EDOperand*)operand)->isRegister())
+ return -1;
+ *value = ((EDOperand*)operand)->regVal();
+ return 0;
+}
+
+int EDImmediateOperandValue(uint64_t *value, EDOperandRef operand) {
+ if (!((EDOperand*)operand)->isImmediate())
+ return -1;
+ *value = ((EDOperand*)operand)->immediateVal();
+ return 0;
+}
+
+int EDEvaluateOperand(uint64_t *result, EDOperandRef operand,
+ ::EDRegisterReaderCallback regReader, void *arg) {
+ return ((EDOperand*)operand)->evaluate(*result, regReader, arg);
+}
+
+#ifdef __BLOCKS__
+
+struct ByteReaderWrapper {
+ EDByteBlock_t byteBlock;
+};
+
+static int readerWrapperCallback(uint8_t *byte,
+ uint64_t address,
+ void *arg) {
+ struct ByteReaderWrapper *wrapper = (struct ByteReaderWrapper *)arg;
+ return wrapper->byteBlock(byte, address);
+}
+
+unsigned int EDBlockCreateInsts(EDInstRef *insts,
+ int count,
+ EDDisassemblerRef disassembler,
+ EDByteBlock_t byteBlock,
+ uint64_t address) {
+ struct ByteReaderWrapper wrapper;
+ wrapper.byteBlock = byteBlock;
+
+ return EDCreateInsts(insts,
+ count,
+ disassembler,
+ readerWrapperCallback,
+ address,
+ (void*)&wrapper);
+}
+
+int EDBlockEvaluateOperand(uint64_t *result, EDOperandRef operand,
+ EDRegisterBlock_t regBlock) {
+ return ((EDOperand*)operand)->evaluate(*result, regBlock);
+}
+
+int EDBlockVisitTokens(EDInstRef inst, ::EDTokenVisitor_t visitor) {
+ return ((EDInst*)inst)->visitTokens((llvm::EDTokenVisitor_t)visitor);
+}
+
+#else
+
+extern "C" unsigned int EDBlockCreateInsts() {
+ return 0;
+}
+
+extern "C" int EDBlockEvaluateOperand() {
+ return -1;
+}
+
+extern "C" int EDBlockVisitTokens() {
+ return -1;
+}
+
+#endif
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index d2bbd7d..46ab65f 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -19,9 +19,12 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/SourceMgr.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
using namespace llvm;
// Given a special op, return the address skip amount (in units of
@@ -423,6 +426,342 @@ void MCDwarfFile::dump() const {
print(dbgs());
}
+// Utility function to write a tuple for .debug_abbrev.
+static void EmitAbbrev(MCStreamer *MCOS, uint64_t Name, uint64_t Form) {
+ MCOS->EmitULEB128IntValue(Name);
+ MCOS->EmitULEB128IntValue(Form);
+}
+
+// When generating dwarf for assembly source files this emits
+// the data for .debug_abbrev section which contains three DIEs.
+static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
+ MCContext &context = MCOS->getContext();
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection());
+
+ // DW_TAG_compile_unit DIE abbrev (1).
+ MCOS->EmitULEB128IntValue(1);
+ MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit);
+ MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
+ EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4);
+ EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
+ EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr);
+ EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string);
+ EmitAbbrev(MCOS, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string);
+ StringRef DwarfDebugFlags = context.getDwarfDebugFlags();
+ if (!DwarfDebugFlags.empty())
+ EmitAbbrev(MCOS, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string);
+ EmitAbbrev(MCOS, dwarf::DW_AT_producer, dwarf::DW_FORM_string);
+ EmitAbbrev(MCOS, dwarf::DW_AT_language, dwarf::DW_FORM_data2);
+ EmitAbbrev(MCOS, 0, 0);
+
+ // DW_TAG_subprogram DIE abbrev (2).
+ MCOS->EmitULEB128IntValue(2);
+ MCOS->EmitULEB128IntValue(dwarf::DW_TAG_subprogram);
+ MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
+ EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string);
+ EmitAbbrev(MCOS, dwarf::DW_AT_decl_file, dwarf::DW_FORM_data4);
+ EmitAbbrev(MCOS, dwarf::DW_AT_decl_line, dwarf::DW_FORM_data4);
+ EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
+ EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr);
+ EmitAbbrev(MCOS, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag);
+ EmitAbbrev(MCOS, 0, 0);
+
+ // DW_TAG_unspecified_parameters DIE abbrev (3).
+ MCOS->EmitULEB128IntValue(3);
+ MCOS->EmitULEB128IntValue(dwarf::DW_TAG_unspecified_parameters);
+ MCOS->EmitIntValue(dwarf::DW_CHILDREN_no, 1);
+ EmitAbbrev(MCOS, 0, 0);
+
+ // Terminate the abbreviations for this compilation unit.
+ MCOS->EmitIntValue(0, 1);
+}
+
+// When generating dwarf for assembly source files this emits the data for
+// .debug_aranges section. Which contains a header and a table of pairs of
+// PointerSize'ed values for the address and size of section(s) with line table
+// entries (just the default .text in our case) and a terminating pair of zeros.
+static void EmitGenDwarfAranges(MCStreamer *MCOS) {
+ MCContext &context = MCOS->getContext();
+
+ // Create a symbol at the end of the section that we are creating the dwarf
+ // debugging info to use later in here as part of the expression to calculate
+ // the size of the section for the table.
+ MCOS->SwitchSection(context.getGenDwarfSection());
+ MCSymbol *SectionEndSym = context.CreateTempSymbol();
+ MCOS->EmitLabel(SectionEndSym);
+ context.setGenDwarfSectionEndSym(SectionEndSym);
+
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
+
+ // This will be the length of the .debug_aranges section, first account for
+ // the size of each item in the header (see below where we emit these items).
+ int Length = 4 + 2 + 4 + 1 + 1;
+
+ // Figure the padding after the header before the table of address and size
+ // pairs who's values are PointerSize'ed.
+ const MCAsmInfo &asmInfo = context.getAsmInfo();
+ int AddrSize = asmInfo.getPointerSize();
+ int Pad = 2 * AddrSize - (Length & (2 * AddrSize - 1));
+ if (Pad == 2 * AddrSize)
+ Pad = 0;
+ Length += Pad;
+
+ // Add the size of the pair of PointerSize'ed values for the address and size
+ // of the one default .text section we have in the table.
+ Length += 2 * AddrSize;
+ // And the pair of terminating zeros.
+ Length += 2 * AddrSize;
+
+
+ // Emit the header for this section.
+ // The 4 byte length not including the 4 byte value for the length.
+ MCOS->EmitIntValue(Length - 4, 4);
+ // The 2 byte version, which is 2.
+ MCOS->EmitIntValue(2, 2);
+ // The 4 byte offset to the compile unit in the .debug_info from the start
+ // of the .debug_info, it is at the start of that section so this is zero.
+ MCOS->EmitIntValue(0, 4);
+ // The 1 byte size of an address.
+ MCOS->EmitIntValue(AddrSize, 1);
+ // The 1 byte size of a segment descriptor, we use a value of zero.
+ MCOS->EmitIntValue(0, 1);
+ // Align the header with the padding if needed, before we put out the table.
+ for(int i = 0; i < Pad; i++)
+ MCOS->EmitIntValue(0, 1);
+
+ // Now emit the table of pairs of PointerSize'ed values for the section(s)
+ // address and size, in our case just the one default .text section.
+ const MCExpr *Addr = MCSymbolRefExpr::Create(
+ context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context);
+ const MCExpr *Size = MakeStartMinusEndExpr(*MCOS,
+ *context.getGenDwarfSectionStartSym(), *SectionEndSym, 0);
+ MCOS->EmitAbsValue(Addr, AddrSize);
+ MCOS->EmitAbsValue(Size, AddrSize);
+
+ // And finally the pair of terminating zeros.
+ MCOS->EmitIntValue(0, AddrSize);
+ MCOS->EmitIntValue(0, AddrSize);
+}
+
+// When generating dwarf for assembly source files this emits the data for
+// .debug_info section which contains three parts. The header, the compile_unit
+// DIE and a list of subprogram DIEs.
+static void EmitGenDwarfInfo(MCStreamer *MCOS) {
+ MCContext &context = MCOS->getContext();
+
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
+
+ // Create a symbol at the start and end of this section used in here for the
+ // expression to calculate the length in the header.
+ MCSymbol *InfoStart = context.CreateTempSymbol();
+ MCOS->EmitLabel(InfoStart);
+ MCSymbol *InfoEnd = context.CreateTempSymbol();
+
+ // First part: the header.
+
+ // The 4 byte total length of the information for this compilation unit, not
+ // including these 4 bytes.
+ const MCExpr *Length = MakeStartMinusEndExpr(*MCOS, *InfoStart, *InfoEnd, 4);
+ MCOS->EmitAbsValue(Length, 4);
+
+ // The 2 byte DWARF version, which is 2.
+ MCOS->EmitIntValue(2, 2);
+
+ // The 4 byte offset to the debug abbrevs from the start of the .debug_abbrev,
+ // it is at the start of that section so this is zero.
+ MCOS->EmitIntValue(0, 4);
+
+ const MCAsmInfo &asmInfo = context.getAsmInfo();
+ int AddrSize = asmInfo.getPointerSize();
+ // The 1 byte size of an address.
+ MCOS->EmitIntValue(AddrSize, 1);
+
+ // Second part: the compile_unit DIE.
+
+ // The DW_TAG_compile_unit DIE abbrev (1).
+ MCOS->EmitULEB128IntValue(1);
+
+ // DW_AT_stmt_list, a 4 byte offset from the start of the .debug_line section,
+ // which is at the start of that section so this is zero.
+ MCOS->EmitIntValue(0, 4);
+
+ // AT_low_pc, the first address of the default .text section.
+ const MCExpr *Start = MCSymbolRefExpr::Create(
+ context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context);
+ MCOS->EmitAbsValue(Start, AddrSize);
+
+ // AT_high_pc, the last address of the default .text section.
+ const MCExpr *End = MCSymbolRefExpr::Create(
+ context.getGenDwarfSectionEndSym(), MCSymbolRefExpr::VK_None, context);
+ MCOS->EmitAbsValue(End, AddrSize);
+
+ // AT_name, the name of the source file. Reconstruct from the first directory
+ // and file table entries.
+ const std::vector<StringRef> &MCDwarfDirs =
+ context.getMCDwarfDirs();
+ if (MCDwarfDirs.size() > 0) {
+ MCOS->EmitBytes(MCDwarfDirs[0], 0);
+ MCOS->EmitBytes("/", 0);
+ }
+ const std::vector<MCDwarfFile *> &MCDwarfFiles =
+ MCOS->getContext().getMCDwarfFiles();
+ MCOS->EmitBytes(MCDwarfFiles[1]->getName(), 0);
+ MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+
+ // AT_comp_dir, the working directory the assembly was done in.
+ llvm::sys::Path CWD = llvm::sys::Path::GetCurrentDirectory();
+ MCOS->EmitBytes(StringRef(CWD.c_str()), 0);
+ MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+
+ // AT_APPLE_flags, the command line arguments of the assembler tool.
+ StringRef DwarfDebugFlags = context.getDwarfDebugFlags();
+ if (!DwarfDebugFlags.empty()){
+ MCOS->EmitBytes(DwarfDebugFlags, 0);
+ MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+ }
+
+ // AT_producer, the version of the assembler tool.
+ MCOS->EmitBytes(StringRef("llvm-mc (based on LLVM "), 0);
+ MCOS->EmitBytes(StringRef(PACKAGE_VERSION), 0);
+ MCOS->EmitBytes(StringRef(")"), 0);
+ MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+
+ // AT_language, a 4 byte value. We use DW_LANG_Mips_Assembler as the dwarf2
+ // draft has no standard code for assembler.
+ MCOS->EmitIntValue(dwarf::DW_LANG_Mips_Assembler, 2);
+
+ // Third part: the list of subprogram DIEs.
+
+ // Loop on saved info for dwarf subprograms and create the DIEs for them.
+ const std::vector<const MCGenDwarfSubprogramEntry *> &Entries =
+ MCOS->getContext().getMCGenDwarfSubprogramEntries();
+ for (std::vector<const MCGenDwarfSubprogramEntry *>::const_iterator it =
+ Entries.begin(), ie = Entries.end(); it != ie;
+ ++it) {
+ const MCGenDwarfSubprogramEntry *Entry = *it;
+
+ // The DW_TAG_subprogram DIE abbrev (2).
+ MCOS->EmitULEB128IntValue(2);
+
+ // AT_name, of the label without any leading underbar.
+ MCOS->EmitBytes(Entry->getName(), 0);
+ MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+
+ // AT_decl_file, index into the file table.
+ MCOS->EmitIntValue(Entry->getFileNumber(), 4);
+
+ // AT_decl_line, source line number.
+ MCOS->EmitIntValue(Entry->getLineNumber(), 4);
+
+ // AT_low_pc, start address of the label.
+ const MCExpr *AT_low_pc = MCSymbolRefExpr::Create(Entry->getLabel(),
+ MCSymbolRefExpr::VK_None, context);
+ MCOS->EmitAbsValue(AT_low_pc, AddrSize);
+
+ // AT_high_pc, end address which is the next label or end of the section.
+ std::vector<const MCGenDwarfSubprogramEntry *>::const_iterator next = it+1;
+ if (next != Entries.end()){
+ const MCGenDwarfSubprogramEntry *NextEntry = *next;
+ const MCExpr *AT_high_pc = MCSymbolRefExpr::Create(NextEntry->getLabel(),
+ MCSymbolRefExpr::VK_None, context);
+ MCOS->EmitAbsValue(AT_high_pc, AddrSize);
+ } else {
+ MCOS->EmitAbsValue(End, AddrSize);
+ }
+
+ // DW_AT_prototyped, a one byte flag value of 0 saying we have no prototype.
+ MCOS->EmitIntValue(0, 1);
+
+ // The DW_TAG_unspecified_parameters DIE abbrev (3).
+ MCOS->EmitULEB128IntValue(3);
+
+ // Add the NULL DIE terminating the DW_TAG_unspecified_parameters DIE's.
+ MCOS->EmitIntValue(0, 1);
+ }
+ // Deallocate the MCGenDwarfSubprogramEntry classes that saved away the info
+ // for the dwarf subprograms.
+ for (std::vector<const MCGenDwarfSubprogramEntry *>::const_iterator it =
+ Entries.begin(), ie = Entries.end(); it != ie;
+ ++it) {
+ const MCGenDwarfSubprogramEntry *Entry = *it;
+ delete Entry;
+ }
+
+ // Add the NULL DIE terminating the Compile Unit DIE's.
+ MCOS->EmitIntValue(0, 1);
+
+ // Now set the value of the symbol at the end of the info section.
+ MCOS->EmitLabel(InfoEnd);
+}
+
+//
+// When generating dwarf for assembly source files this emits the Dwarf
+// sections.
+//
+void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
+ // Create the dwarf sections in this order (.debug_line already created).
+ MCContext &context = MCOS->getContext();
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection());
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
+
+ // If there are no line table entries then do not emit any section contents.
+ if (context.getMCLineSections().empty())
+ return;
+
+ // Output the data for .debug_aranges section.
+ EmitGenDwarfAranges(MCOS);
+
+ // Output the data for .debug_abbrev section.
+ EmitGenDwarfAbbrev(MCOS);
+
+ // Output the data for .debug_info section.
+ EmitGenDwarfInfo(MCOS);
+}
+
+//
+// When generating dwarf for assembly source files this is called when symbol
+// for a label is created. If this symbol is not a temporary and is in the
+// section that dwarf is being generated for, save the needed info to create
+// a dwarf subprogram.
+//
+void MCGenDwarfSubprogramEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS,
+ SourceMgr &SrcMgr, SMLoc &Loc) {
+ // We won't create dwarf subprogram's for temporary symbols or symbols not in
+ // the default text.
+ if (Symbol->isTemporary())
+ return;
+ MCContext &context = MCOS->getContext();
+ if (context.getGenDwarfSection() != MCOS->getCurrentSection())
+ return;
+
+ // The dwarf subprogram's name does not have the symbol name's leading
+ // underbar if any.
+ StringRef Name = Symbol->getName();
+ if (Name.startswith("_"))
+ Name = Name.substr(1, Name.size()-1);
+
+ // Get the dwarf file number to be used for the dwarf subprogram.
+ unsigned FileNumber = context.getGenDwarfFileNumber();
+
+ // Finding the line number is the expensive part which is why we just don't
+ // pass it in as for some symbols we won't create a dwarf subprogram.
+ int CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
+ unsigned LineNumber = SrcMgr.FindLineNumber(Loc, CurBuffer);
+
+ // We create a temporary symbol for use for the AT_high_pc and AT_low_pc
+ // values so that they don't have things like an ARM thumb bit from the
+ // original symbol. So when used they won't get a low bit set after
+ // relocation.
+ MCSymbol *Label = context.CreateTempSymbol();
+ MCOS->EmitLabel(Label);
+
+ // Create and entry for the info and add it to the other entries.
+ MCGenDwarfSubprogramEntry *Entry =
+ new MCGenDwarfSubprogramEntry(Name, FileNumber, LineNumber, Label);
+ MCOS->getContext().addMCGenDwarfSubprogramEntry(Entry);
+}
+
static int getDataAlignmentFactor(MCStreamer &streamer) {
MCContext &context = streamer.getContext();
const MCAsmInfo &asmInfo = context.getAsmInfo();
@@ -1009,10 +1348,7 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer,
ArrayRef<MCDwarfFrameInfo> FrameArray = Streamer.getFrameInfos();
// Emit the compact unwind info if available.
- // FIXME: This emits both the compact unwind and the old CIE/FDE
- // information. Only one of those is needed.
- // FIXME: Disable. This seems to still be causing failures.
- if (false && IsEH && MOFI->getCompactUnwindSection())
+ if (IsEH && MOFI->getCompactUnwindSection())
for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) {
const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i);
if (Frame.CompactUnwindEncoding)
diff --git a/lib/MC/MCELF.cpp b/lib/MC/MCELF.cpp
index dad2e7b..f9f98e0 100644
--- a/lib/MC/MCELF.cpp
+++ b/lib/MC/MCELF.cpp
@@ -37,7 +37,7 @@ void MCELF::SetType(MCSymbolData &SD, unsigned Type) {
assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
- Type == ELF::STT_TLS);
+ Type == ELF::STT_TLS || Type == ELF::STT_GNU_IFUNC);
uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift);
SD.setFlags(OtherFlags | (Type << ELF_STT_Shift));
@@ -48,7 +48,7 @@ unsigned MCELF::GetType(const MCSymbolData &SD) {
assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
- Type == ELF::STT_TLS);
+ Type == ELF::STT_TLS || Type == ELF::STT_GNU_IFUNC);
return Type;
}
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 0ea3c64..dcc4666 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -130,7 +130,6 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
case MCSA_WeakDefinition:
case MCSA_WeakDefAutoPrivate:
case MCSA_Invalid:
- case MCSA_ELF_TypeIndFunction:
case MCSA_IndirectSymbol:
assert(0 && "Invalid symbol attribute for ELF!");
break;
@@ -162,6 +161,10 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
MCELF::SetType(SD, ELF::STT_FUNC);
break;
+ case MCSA_ELF_TypeIndFunction:
+ MCELF::SetType(SD, ELF::STT_GNU_IFUNC);
+ break;
+
case MCSA_ELF_TypeObject:
MCELF::SetType(SD, ELF::STT_OBJECT);
break;
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index aa35815..50ab1f8 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -156,8 +156,6 @@ void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
}
void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) {
- // FIXME: Flag the function ISA as thumb with DW_AT_APPLE_isa.
-
// Remember that the function is a thumb function. Fixup and relocation
// values will need adjusted.
getAssembler().setIsThumbFunc(Symbol);
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 7d23541..32ba924 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -31,6 +31,8 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
if (T.isMacOSX() && T.isMacOSXVersionLT(10, 5))
CommDirectiveSupportsAlignment = false;
+ StructorOutputOrder = Structors::PriorityOrder;
+
TextSection // .text
= Ctx->getMachOSection("__TEXT", "__text",
MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
@@ -258,6 +260,8 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
}
}
+ StructorOutputOrder = Structors::ReversePriorityOrder;
+
// ELF
BSSSection =
Ctx->getELFSection(".bss", ELF::SHT_NOBITS,
@@ -385,6 +389,8 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
// COFF
+ StructorOutputOrder = Structors::ReversePriorityOrder;
+
TextSection =
Ctx->getCOFFSection(".text",
COFF::IMAGE_SCN_CNT_CODE |
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index 90c957f..663d0ca 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -260,5 +260,9 @@ void MCObjectStreamer::Finish() {
if (getContext().hasDwarfFiles())
MCDwarfFileTable::Emit(this);
+ // If we are generating dwarf for assembly source files dump out the sections.
+ if (getContext().getGenDwarfForAssembly())
+ MCGenDwarfInfo::Emit(this);
+
getAssembler().Finish();
}
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 7883893..aac020d 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -181,6 +181,9 @@ private:
/// EnterIncludeFile - Enter the specified file. This returns true on failure.
bool EnterIncludeFile(const std::string &Filename);
+ /// ProcessIncbinFile - Process the specified file for the .incbin directive.
+ /// This returns true on failure.
+ bool ProcessIncbinFile(const std::string &Filename);
/// \brief Reset the current lexer position to that given by \arg Loc. The
/// current token is not set; clients should ensure Lex() is called
@@ -227,6 +230,7 @@ private:
bool ParseDirectiveAbort(); // ".abort"
bool ParseDirectiveInclude(); // ".include"
+ bool ParseDirectiveIncbin(); // ".incbin"
bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if"
// ".ifdef" or ".ifndef", depending on expect_defined
@@ -429,6 +433,21 @@ bool AsmParser::EnterIncludeFile(const std::string &Filename) {
return false;
}
+/// Process the specified .incbin file by seaching for it in the include paths
+/// then just emiting the byte contents of the file to the streamer. This
+/// returns true on failure.
+bool AsmParser::ProcessIncbinFile(const std::string &Filename) {
+ std::string IncludedFile;
+ int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
+ if (NewBuf == -1)
+ return true;
+
+ // Pick up the bytes from the file and emit them.
+ getStreamer().EmitBytes(SrcMgr.getMemoryBuffer(NewBuf)->getBuffer(),
+ DEFAULT_ADDRSPACE);
+ return false;
+}
+
void AsmParser::JumpToLoc(SMLoc Loc) {
CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), Loc.getPointer());
@@ -468,6 +487,9 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// section and generate a .file directive.
if (getContext().getGenDwarfForAssembly()) {
getContext().setGenDwarfSection(getStreamer().getCurrentSection());
+ MCSymbol *SectionStartSym = getContext().CreateTempSymbol();
+ getStreamer().EmitLabel(SectionStartSym);
+ getContext().setGenDwarfSectionStartSym(SectionStartSym);
getStreamer().EmitDwarfFileDirective(getContext().nextGenDwarfFileNumber(),
StringRef(), SrcMgr.getMemoryBuffer(CurBuffer)->getBufferIdentifier());
}
@@ -1047,6 +1069,12 @@ bool AsmParser::ParseStatement() {
// Emit the label.
Out.EmitLabel(Sym);
+ // If we are generating dwarf for assembly source files then gather the
+ // info to make a dwarf subprogram entry for this label if needed.
+ if (getContext().getGenDwarfForAssembly())
+ MCGenDwarfSubprogramEntry::Make(Sym, &getStreamer(), getSourceManager(),
+ IDLoc);
+
// Consume any end of statement token, if present, to avoid spurious
// AddBlankLine calls().
if (Lexer.is(AsmToken::EndOfStatement)) {
@@ -1174,6 +1202,8 @@ bool AsmParser::ParseStatement() {
return ParseDirectiveAbort();
if (IDVal == ".include")
return ParseDirectiveInclude();
+ if (IDVal == ".incbin")
+ return ParseDirectiveIncbin();
if (IDVal == ".code16")
return TokError(Twine(IDVal) + " not supported yet");
@@ -2197,6 +2227,31 @@ bool AsmParser::ParseDirectiveInclude() {
return false;
}
+/// ParseDirectiveIncbin
+/// ::= .incbin "filename"
+bool AsmParser::ParseDirectiveIncbin() {
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("expected string in '.incbin' directive");
+
+ std::string Filename = getTok().getString();
+ SMLoc IncbinLoc = getLexer().getLoc();
+ Lex();
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.incbin' directive");
+
+ // Strip the quotes.
+ Filename = Filename.substr(1, Filename.size()-2);
+
+ // Attempt to process the included file.
+ if (ProcessIncbinFile(Filename)) {
+ Error(IncbinLoc, "Could not find incbin file '" + Filename + "'");
+ return true;
+ }
+
+ return false;
+}
+
/// ParseDirectiveIf
/// ::= .if expression
bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
diff --git a/lib/MC/MCParser/CMakeLists.txt b/lib/MC/MCParser/CMakeLists.txt
index 299d281..222f237 100644
--- a/lib/MC/MCParser/CMakeLists.txt
+++ b/lib/MC/MCParser/CMakeLists.txt
@@ -9,8 +9,3 @@ add_llvm_library(LLVMMCParser
MCAsmParserExtension.cpp
MCTargetAsmParser.cpp
)
-
-add_llvm_library_dependencies(LLVMMCParser
- LLVMMC
- LLVMSupport
- )
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index d891126..ffc400b 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -476,6 +476,7 @@ bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
.Case("common", MCSA_ELF_TypeCommon)
.Case("notype", MCSA_ELF_TypeNoType)
.Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject)
+ .Case("gnu_indirect_function", MCSA_ELF_TypeIndFunction)
.Default(MCSA_Invalid);
if (Attr == MCSA_Invalid)
diff --git a/lib/MC/MCParser/LLVMBuild.txt b/lib/MC/MCParser/LLVMBuild.txt
index 83146a9..bcb0feb 100644
--- a/lib/MC/MCParser/LLVMBuild.txt
+++ b/lib/MC/MCParser/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = MCParser
parent = MC
required_libraries = MC Support
-
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index a9219ad..e016f09 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -584,9 +584,14 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
// requires the compiler to use .set to absolutize the differences between
// symbols which the compiler knows to be assembly time constants, so we
// don't need to worry about considering symbol differences fully resolved.
+ //
+ // If the file isn't using sub-sections-via-symbols, we can make the
+ // same assumptions about any symbol that we normally make about
+ // assembler locals.
if (!Asm.getBackend().hasReliableSymbolDifference()) {
- if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB)
+ if ((!SA.isTemporary() && Asm.getSubsectionsViaSymbols()) ||
+ !SA.isInSection() || &SecA != &SecB)
return false;
return true;
}
@@ -628,7 +633,7 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
}
void MachObjectWriter::WriteObject(MCAssembler &Asm,
- const MCAsmLayout &Layout) {
+ const MCAsmLayout &Layout) {
unsigned NumSections = Asm.size();
// The section data starts after the header, the segment load command (and
@@ -731,7 +736,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
// Write the actual section data.
for (MCAssembler::const_iterator it = Asm.begin(),
ie = Asm.end(); it != ie; ++it) {
- Asm.WriteSectionData(it, Layout);
+ Asm.writeSectionData(it, Layout);
uint64_t Pad = getPaddingSize(it, Layout);
for (unsigned int i = 0; i < Pad; ++i)
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index 4d3b59c..4052374 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -850,7 +850,7 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
assert(OS.tell() == (*i)->Header.PointerToRawData &&
"Section::PointerToRawData is insane!");
- Asm.WriteSectionData(j, Layout);
+ Asm.writeSectionData(j, Layout);
}
if ((*i)->Relocations.size() > 0) {
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
index 86eb51a..c20fc0c 100644
--- a/lib/Object/CMakeLists.txt
+++ b/lib/Object/CMakeLists.txt
@@ -9,8 +9,3 @@ add_llvm_library(LLVMObject
Object.cpp
ObjectFile.cpp
)
-
-add_llvm_library_dependencies(LLVMObject
- LLVMCore
- LLVMSupport
- )
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index c6ce562..bdf5431 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -101,7 +101,7 @@ error_code COFFObjectFile::getSymbolNext(DataRefImpl Symb,
return getSymbolName(symb, Result);
}
-error_code COFFObjectFile::getSymbolOffset(DataRefImpl Symb,
+error_code COFFObjectFile::getSymbolFileOffset(DataRefImpl Symb,
uint64_t &Result) const {
const coff_symbol *symb = toSymb(Symb);
const coff_section *Section = NULL;
@@ -113,7 +113,7 @@ error_code COFFObjectFile::getSymbolOffset(DataRefImpl Symb,
if (Type == 'U' || Type == 'w')
Result = UnknownAddressOrSize;
else if (Section)
- Result = Section->VirtualAddress + symb->Value;
+ Result = Section->PointerToRawData + symb->Value;
else
Result = symb->Value;
return object_error::success;
@@ -131,11 +131,9 @@ error_code COFFObjectFile::getSymbolAddress(DataRefImpl Symb,
if (Type == 'U' || Type == 'w')
Result = UnknownAddressOrSize;
else if (Section)
- Result = reinterpret_cast<uintptr_t>(base() +
- Section->PointerToRawData +
- symb->Value);
+ Result = Section->VirtualAddress + symb->Value;
else
- Result = reinterpret_cast<uintptr_t>(base() + symb->Value);
+ Result = symb->Value;
return object_error::success;
}
@@ -283,7 +281,7 @@ error_code COFFObjectFile::getSymbolSection(DataRefImpl Symb,
if (symb->SectionNumber <= COFF::IMAGE_SYM_UNDEFINED)
Result = end_sections();
else {
- const coff_section *sec;
+ const coff_section *sec = 0;
if (error_code ec = getSection(symb->SectionNumber, sec)) return ec;
DataRefImpl Sec;
std::memset(&Sec, 0, sizeof(Sec));
@@ -389,7 +387,7 @@ error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl Sec,
bool &Result) const {
const coff_section *sec = toSec(Sec);
const coff_symbol *symb = toSymb(Symb);
- const coff_section *symb_sec;
+ const coff_section *symb_sec = 0;
if (error_code ec = getSection(symb->SectionNumber, symb_sec)) return ec;
if (symb_sec == sec)
Result = true;
@@ -624,6 +622,11 @@ error_code COFFObjectFile::getRelocationAddress(DataRefImpl Rel,
Res = toRel(Rel)->VirtualAddress;
return object_error::success;
}
+error_code COFFObjectFile::getRelocationOffset(DataRefImpl Rel,
+ uint64_t &Res) const {
+ Res = toRel(Rel)->VirtualAddress;
+ return object_error::success;
+}
error_code COFFObjectFile::getRelocationSymbol(DataRefImpl Rel,
SymbolRef &Res) const {
const coff_relocation* R = toRel(Rel);
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index d1a43e7..a6c4c25 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -325,7 +325,7 @@ class ELFObjectFile : public ObjectFile {
protected:
virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
- virtual error_code getSymbolOffset(DataRefImpl Symb, uint64_t &Res) const;
+ virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const;
virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
@@ -355,6 +355,8 @@ protected:
RelocationRef &Res) const;
virtual error_code getRelocationAddress(DataRefImpl Rel,
uint64_t &Res) const;
+ virtual error_code getRelocationOffset(DataRefImpl Rel,
+ uint64_t &Res) const;
virtual error_code getRelocationSymbol(DataRefImpl Rel,
SymbolRef &Res) const;
virtual error_code getRelocationType(DataRefImpl Rel,
@@ -462,7 +464,7 @@ ELFObjectFile<target_endianness, is64Bits>
template<support::endianness target_endianness, bool is64Bits>
error_code ELFObjectFile<target_endianness, is64Bits>
- ::getSymbolOffset(DataRefImpl Symb,
+ ::getSymbolFileOffset(DataRefImpl Symb,
uint64_t &Result) const {
validateSymbol(Symb);
const Elf_Sym *symb = getSymbol(Symb);
@@ -486,7 +488,8 @@ error_code ELFObjectFile<target_endianness, is64Bits>
case ELF::STT_FUNC:
case ELF::STT_OBJECT:
case ELF::STT_NOTYPE:
- Result = symb->st_value;
+ Result = symb->st_value +
+ (Section ? Section->sh_offset - Section->sh_addr : 0);
return object_error::success;
default:
Result = UnknownAddressOrSize;
@@ -502,28 +505,25 @@ error_code ELFObjectFile<target_endianness, is64Bits>
const Elf_Sym *symb = getSymbol(Symb);
const Elf_Shdr *Section;
switch (getSymbolTableIndex(symb)) {
- case ELF::SHN_COMMON: // Fall through.
+ case ELF::SHN_COMMON:
// Undefined symbols have no address yet.
case ELF::SHN_UNDEF:
Result = UnknownAddressOrSize;
return object_error::success;
case ELF::SHN_ABS:
- Result = reinterpret_cast<uintptr_t>(base()+symb->st_value);
+ Result = symb->st_value;
return object_error::success;
default: Section = getSection(symb);
}
- const uint8_t* addr = base();
- if (Section)
- addr += Section->sh_offset;
+
switch (symb->getType()) {
case ELF::STT_SECTION:
- Result = reinterpret_cast<uintptr_t>(addr);
+ Result = Section ? Section->sh_addr : UnknownAddressOrSize;
return object_error::success;
- case ELF::STT_FUNC: // Fall through.
- case ELF::STT_OBJECT: // Fall through.
+ case ELF::STT_FUNC:
+ case ELF::STT_OBJECT:
case ELF::STT_NOTYPE:
- addr += symb->st_value;
- Result = reinterpret_cast<uintptr_t>(addr);
+ Result = symb->st_value;
return object_error::success;
default:
Result = UnknownAddressOrSize;
@@ -922,6 +922,29 @@ error_code ELFObjectFile<target_endianness, is64Bits>
template<support::endianness target_endianness, bool is64Bits>
error_code ELFObjectFile<target_endianness, is64Bits>
+ ::getRelocationOffset(DataRefImpl Rel,
+ uint64_t &Result) const {
+ uint64_t offset;
+ const Elf_Shdr *sec = getSection(Rel.w.b);
+ switch (sec->sh_type) {
+ default :
+ report_fatal_error("Invalid section type in Rel!");
+ case ELF::SHT_REL : {
+ offset = getRel(Rel)->r_offset;
+ break;
+ }
+ case ELF::SHT_RELA : {
+ offset = getRela(Rel)->r_offset;
+ break;
+ }
+ }
+
+ Result = offset - sec->sh_addr;
+ return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
::getRelocationType(DataRefImpl Rel,
uint64_t &Result) const {
const Elf_Shdr *sec = getSection(Rel.w.b);
diff --git a/lib/Object/LLVMBuild.txt b/lib/Object/LLVMBuild.txt
index 20fbb85..0041acd 100644
--- a/lib/Object/LLVMBuild.txt
+++ b/lib/Object/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = Object
parent = Libraries
required_libraries = Core Support
-
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 65ce5f8..4fa621b 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -125,23 +125,27 @@ error_code MachOObjectFile::getSymbolName(DataRefImpl DRI,
return object_error::success;
}
-error_code MachOObjectFile::getSymbolOffset(DataRefImpl DRI,
- uint64_t &Result) const {
- uint64_t SectionOffset;
- uint8_t SectionIndex;
+error_code MachOObjectFile::getSymbolFileOffset(DataRefImpl DRI,
+ uint64_t &Result) const {
if (MachOObj->is64Bit()) {
InMemoryStruct<macho::Symbol64TableEntry> Entry;
getSymbol64TableEntry(DRI, Entry);
Result = Entry->Value;
- SectionIndex = Entry->SectionIndex;
+ if (Entry->SectionIndex) {
+ InMemoryStruct<macho::Section64> Section;
+ getSection64(Sections[Entry->SectionIndex-1], Section);
+ Result += Section->Offset - Section->Address;
+ }
} else {
InMemoryStruct<macho::SymbolTableEntry> Entry;
getSymbolTableEntry(DRI, Entry);
Result = Entry->Value;
- SectionIndex = Entry->SectionIndex;
+ if (Entry->SectionIndex) {
+ InMemoryStruct<macho::Section> Section;
+ getSection(Sections[Entry->SectionIndex-1], Section);
+ Result += Section->Offset - Section->Address;
+ }
}
- getSectionAddress(Sections[SectionIndex-1], SectionOffset);
- Result -= SectionOffset;
return object_error::success;
}
@@ -162,7 +166,64 @@ error_code MachOObjectFile::getSymbolAddress(DataRefImpl DRI,
error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
uint64_t &Result) const {
- Result = UnknownAddressOrSize;
+ uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
+ uint64_t BeginOffset;
+ uint64_t EndOffset = 0;
+ uint8_t SectionIndex;
+ if (MachOObj->is64Bit()) {
+ InMemoryStruct<macho::Symbol64TableEntry> Entry;
+ getSymbol64TableEntry(DRI, Entry);
+ BeginOffset = Entry->Value;
+ SectionIndex = Entry->SectionIndex;
+ if (!SectionIndex) {
+ Result = UnknownAddressOrSize;
+ return object_error::success;
+ }
+ // Unfortunately symbols are unsorted so we need to touch all
+ // symbols from load command
+ DRI.d.b = 0;
+ uint32_t Command = DRI.d.a;
+ while (Command == DRI.d.a) {
+ moveToNextSymbol(DRI);
+ if (DRI.d.a < LoadCommandCount) {
+ getSymbol64TableEntry(DRI, Entry);
+ if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset)
+ if (!EndOffset || Entry->Value < EndOffset)
+ EndOffset = Entry->Value;
+ }
+ DRI.d.b++;
+ }
+ } else {
+ InMemoryStruct<macho::SymbolTableEntry> Entry;
+ getSymbolTableEntry(DRI, Entry);
+ BeginOffset = Entry->Value;
+ SectionIndex = Entry->SectionIndex;
+ if (!SectionIndex) {
+ Result = UnknownAddressOrSize;
+ return object_error::success;
+ }
+ // Unfortunately symbols are unsorted so we need to touch all
+ // symbols from load command
+ DRI.d.b = 0;
+ uint32_t Command = DRI.d.a;
+ while (Command == DRI.d.a) {
+ moveToNextSymbol(DRI);
+ if (DRI.d.a < LoadCommandCount) {
+ getSymbolTableEntry(DRI, Entry);
+ if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset)
+ if (!EndOffset || Entry->Value < EndOffset)
+ EndOffset = Entry->Value;
+ }
+ DRI.d.b++;
+ }
+ }
+ if (!EndOffset) {
+ uint64_t Size;
+ getSectionSize(Sections[SectionIndex-1], Size);
+ getSectionAddress(Sections[SectionIndex-1], EndOffset);
+ EndOffset += Size;
+ }
+ Result = EndOffset - BeginOffset;
return object_error::success;
}
@@ -275,7 +336,7 @@ error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb,
if (index == 0)
Res = end_sections();
else
- Res = section_iterator(SectionRef(Sections[index], this));
+ Res = section_iterator(SectionRef(Sections[index-1], this));
return object_error::success;
}
@@ -614,7 +675,7 @@ error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel,
bool isScattered = (Arch != Triple::x86_64) &&
(RE->Word0 & macho::RF_Scattered);
uint64_t RelAddr = 0;
- if (isScattered)
+ if (isScattered)
RelAddr = RE->Word0 & 0xFFFFFF;
else
RelAddr = RE->Word0;
@@ -622,6 +683,20 @@ error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel,
Res = reinterpret_cast<uintptr_t>(sectAddress + RelAddr);
return object_error::success;
}
+error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel,
+ uint64_t &Res) const {
+ InMemoryStruct<macho::RelocationEntry> RE;
+ getRelocation(Rel, RE);
+
+ unsigned Arch = getArch();
+ bool isScattered = (Arch != Triple::x86_64) &&
+ (RE->Word0 & macho::RF_Scattered);
+ if (isScattered)
+ Res = RE->Word0 & 0xFFFFFF;
+ else
+ Res = RE->Word0;
+ return object_error::success;
+}
error_code MachOObjectFile::getRelocationSymbol(DataRefImpl Rel,
SymbolRef &Res) const {
InMemoryStruct<macho::RelocationEntry> RE;
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
index 719bf88..f061ea7 100644
--- a/lib/Object/Object.cpp
+++ b/lib/Object/Object.cpp
@@ -150,9 +150,9 @@ uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) {
return ret;
}
-uint64_t LLVMGetSymbolOffset(LLVMSymbolIteratorRef SI) {
+uint64_t LLVMGetSymbolFileOffset(LLVMSymbolIteratorRef SI) {
uint64_t ret;
- if (error_code ec = (*unwrap(SI))->getOffset(ret))
+ if (error_code ec = (*unwrap(SI))->getFileOffset(ret))
report_fatal_error(ec.message());
return ret;
}
@@ -172,6 +172,13 @@ uint64_t LLVMGetRelocationAddress(LLVMRelocationIteratorRef RI) {
return ret;
}
+uint64_t LLVMGetRelocationOffset(LLVMRelocationIteratorRef RI) {
+ uint64_t ret;
+ if (error_code ec = (*unwrap(RI))->getOffset(ret))
+ report_fatal_error(ec.message());
+ return ret;
+}
+
LLVMSymbolIteratorRef LLVMGetRelocationSymbol(LLVMRelocationIteratorRef RI) {
SymbolRef ret;
if (error_code ec = (*unwrap(RI))->getSymbol(ret))
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index f238894..70e7afd 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -1854,20 +1854,33 @@ APFloat::convert(const fltSemantics &toSemantics,
lostFraction lostFraction;
unsigned int newPartCount, oldPartCount;
opStatus fs;
+ int shift;
+ const fltSemantics &fromSemantics = *semantics;
- assertArithmeticOK(*semantics);
+ assertArithmeticOK(fromSemantics);
assertArithmeticOK(toSemantics);
lostFraction = lfExactlyZero;
newPartCount = partCountForBits(toSemantics.precision + 1);
oldPartCount = partCount();
+ shift = toSemantics.precision - fromSemantics.precision;
+
+ bool X86SpecialNan = false;
+ if (&fromSemantics == &APFloat::x87DoubleExtended &&
+ &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
+ (!(*significandParts() & 0x8000000000000000ULL) ||
+ !(*significandParts() & 0x4000000000000000ULL))) {
+ // x86 has some unusual NaNs which cannot be represented in any other
+ // format; note them here.
+ X86SpecialNan = true;
+ }
+
+ // If this is a truncation, perform the shift before we narrow the storage.
+ if (shift < 0 && (category==fcNormal || category==fcNaN))
+ lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
- /* Handle storage complications. If our new form is wider,
- re-allocate our bit pattern into wider storage. If it is
- narrower, we ignore the excess parts, but if narrowing to a
- single part we need to free the old storage.
- Be careful not to reference significandParts for zeroes
- and infinities, since it aborts. */
+ // Fix the storage so it can hold to new value.
if (newPartCount > oldPartCount) {
+ // The new type requires more storage; make it available.
integerPart *newParts;
newParts = new integerPart[newPartCount];
APInt::tcSet(newParts, 0, newPartCount);
@@ -1875,61 +1888,36 @@ APFloat::convert(const fltSemantics &toSemantics,
APInt::tcAssign(newParts, significandParts(), oldPartCount);
freeSignificand();
significand.parts = newParts;
- } else if (newPartCount < oldPartCount) {
- /* Capture any lost fraction through truncation of parts so we get
- correct rounding whilst normalizing. */
- if (category==fcNormal)
- lostFraction = lostFractionThroughTruncation
- (significandParts(), oldPartCount, toSemantics.precision);
- if (newPartCount == 1) {
- integerPart newPart = 0;
- if (category==fcNormal || category==fcNaN)
- newPart = significandParts()[0];
- freeSignificand();
- significand.part = newPart;
- }
+ } else if (newPartCount == 1 && oldPartCount != 1) {
+ // Switch to built-in storage for a single part.
+ integerPart newPart = 0;
+ if (category==fcNormal || category==fcNaN)
+ newPart = significandParts()[0];
+ freeSignificand();
+ significand.part = newPart;
}
+ // Now that we have the right storage, switch the semantics.
+ semantics = &toSemantics;
+
+ // If this is an extension, perform the shift now that the storage is
+ // available.
+ if (shift > 0 && (category==fcNormal || category==fcNaN))
+ APInt::tcShiftLeft(significandParts(), newPartCount, shift);
+
if (category == fcNormal) {
- /* Re-interpret our bit-pattern. */
- exponent += toSemantics.precision - semantics->precision;
- semantics = &toSemantics;
fs = normalize(rounding_mode, lostFraction);
*losesInfo = (fs != opOK);
} else if (category == fcNaN) {
- int shift = toSemantics.precision - semantics->precision;
- // Do this now so significandParts gets the right answer
- const fltSemantics *oldSemantics = semantics;
- semantics = &toSemantics;
- *losesInfo = false;
- // No normalization here, just truncate
- if (shift>0)
- APInt::tcShiftLeft(significandParts(), newPartCount, shift);
- else if (shift < 0) {
- unsigned ushift = -shift;
- // Figure out if we are losing information. This happens
- // if are shifting out something other than 0s, or if the x87 long
- // double input did not have its integer bit set (pseudo-NaN), or if the
- // x87 long double input did not have its QNan bit set (because the x87
- // hardware sets this bit when converting a lower-precision NaN to
- // x87 long double).
- if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
- *losesInfo = true;
- if (oldSemantics == &APFloat::x87DoubleExtended &&
- (!(*significandParts() & 0x8000000000000000ULL) ||
- !(*significandParts() & 0x4000000000000000ULL)))
- *losesInfo = true;
- APInt::tcShiftRight(significandParts(), newPartCount, ushift);
- }
+ *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
// gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
// does not give you back the same bits. This is dubious, and we
// don't currently do it. You're really supposed to get
// an invalid operation signal at runtime, but nobody does that.
fs = opOK;
} else {
- semantics = &toSemantics;
- fs = opOK;
*losesInfo = false;
+ fs = opOK;
}
return fs;
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 55cb433..506225f 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -1440,15 +1440,11 @@ APInt APInt::sqrt() const {
APInt nextSquare((x_old + 1) * (x_old +1));
if (this->ult(square))
return x_old;
- else if (this->ule(nextSquare)) {
- APInt midpoint((nextSquare - square).udiv(two));
- APInt offset(*this - square);
- if (offset.ult(midpoint))
- return x_old;
- else
- return x_old + 1;
- } else
- llvm_unreachable("Error in APInt::sqrt computation");
+ assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
+ APInt midpoint((nextSquare - square).udiv(two));
+ APInt offset(*this - square);
+ if (offset.ult(midpoint))
+ return x_old;
return x_old + 1;
}
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 4b43ae9..ce93449 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -57,6 +57,9 @@ TEMPLATE_INSTANTIATION(class opt<char>);
TEMPLATE_INSTANTIATION(class opt<bool>);
} } // end namespace llvm::cl
+void GenericOptionValue::anchor() {}
+void OptionValue<boolOrDefault>::anchor() {}
+void OptionValue<std::string>::anchor() {}
void Option::anchor() {}
void basic_parser_impl::anchor() {}
void parser<bool>::anchor() {}
diff --git a/lib/Support/DAGDeltaAlgorithm.cpp b/lib/Support/DAGDeltaAlgorithm.cpp
index 8145664..1e89c6a 100644
--- a/lib/Support/DAGDeltaAlgorithm.cpp
+++ b/lib/Support/DAGDeltaAlgorithm.cpp
@@ -350,6 +350,9 @@ DAGDeltaAlgorithmImpl::Run() {
return Required;
}
+void DAGDeltaAlgorithm::anchor() {
+}
+
DAGDeltaAlgorithm::changeset_ty
DAGDeltaAlgorithm::Run(const changeset_ty &Changes,
const std::vector<edge_ty> &Dependencies) {
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index a19e4b4..86d1c5d 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -298,6 +298,8 @@ std::string sys::getHostCPUName() {
}
case 16:
return "amdfam10";
+ case 21:
+ return "bdver1";
default:
return "generic";
}
diff --git a/lib/Support/LLVMBuild.txt b/lib/Support/LLVMBuild.txt
index f32ef8f..5b88be0 100644
--- a/lib/Support/LLVMBuild.txt
+++ b/lib/Support/LLVMBuild.txt
@@ -19,4 +19,3 @@
type = Library
name = Support
parent = Libraries
-
diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp
index 8874e94..6a873cb 100644
--- a/lib/Support/Mutex.cpp
+++ b/lib/Support/Mutex.cpp
@@ -19,7 +19,7 @@
//=== independent code.
//===----------------------------------------------------------------------===//
-#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
// Define all methods as no-ops if threading is explicitly disabled
namespace llvm {
using namespace sys;
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index a4d49dc..dcddeda 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -90,7 +90,7 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
case 0xCF: {
uint16_t type = 0;
if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
- magic[2] == char(0xFA) &&
+ magic[2] == char(0xFA) &&
(magic[3] == char(0xCE) || magic[3] == char(0xCF))) {
/* Native endian */
if (length >= 16) type = magic[14] << 8 | magic[15];
@@ -152,31 +152,31 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
bool
Path::isArchive() const {
- LLVMFileType type;
+ fs::file_magic type;
if (fs::identify_magic(str(), type))
return false;
- return type == Archive_FileType;
+ return type == fs::file_magic::archive;
}
bool
Path::isDynamicLibrary() const {
- LLVMFileType type;
+ fs::file_magic type;
if (fs::identify_magic(str(), type))
return false;
switch (type) {
default: return false;
- case Mach_O_FixedVirtualMemorySharedLib_FileType:
- case Mach_O_DynamicallyLinkedSharedLib_FileType:
- case Mach_O_DynamicallyLinkedSharedLibStub_FileType:
- case ELF_SharedObject_FileType:
- case COFF_FileType: return true;
+ case fs::file_magic::macho_fixed_virtual_memory_shared_lib:
+ case fs::file_magic::macho_dynamically_linked_shared_lib:
+ case fs::file_magic::macho_dynamically_linked_shared_lib_stub:
+ case fs::file_magic::elf_shared_object:
+ case fs::file_magic::pecoff_executable: return true;
}
}
bool
Path::isObjectFile() const {
- LLVMFileType type;
- if (fs::identify_magic(str(), type) || type == Unknown_FileType)
+ fs::file_magic type;
+ if (fs::identify_magic(str(), type) || type == fs::file_magic::unknown)
return false;
return true;
}
@@ -212,10 +212,10 @@ Path::appendSuffix(StringRef suffix) {
bool
Path::isBitcodeFile() const {
- LLVMFileType type;
+ fs::file_magic type;
if (fs::identify_magic(str(), type))
return false;
- return type == Bitcode_FileType;
+ return type == fs::file_magic::bitcode;
}
bool Path::hasMagicNumber(StringRef Magic) const {
diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp
index bebe442..7cc434b 100644
--- a/lib/Support/PathV2.cpp
+++ b/lib/Support/PathV2.cpp
@@ -13,6 +13,7 @@
#include "llvm/Support/PathV2.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include <cctype>
#include <cstdio>
@@ -492,7 +493,7 @@ bool is_separator(char value) {
void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
result.clear();
-
+
// Check whether the temporary directory is specified by an environment
// variable.
const char *EnvironmentVariable;
@@ -505,7 +506,7 @@ void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
result.append(RequestedDir, RequestedDir + strlen(RequestedDir));
return;
}
-
+
// Fall back to a system default.
const char *DefaultResult;
#ifdef LLVM_ON_WIN32
@@ -519,7 +520,7 @@ void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
#endif
result.append(DefaultResult, DefaultResult + strlen(DefaultResult));
}
-
+
bool has_root_name(const Twine &path) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
@@ -738,13 +739,124 @@ error_code has_magic(const Twine &path, const Twine &magic, bool &result) {
return success;
}
-error_code identify_magic(const Twine &path, LLVMFileType &result) {
+/// @brief Identify the magic in magic.
+file_magic identify_magic(StringRef magic) {
+ switch ((unsigned char)magic[0]) {
+ case 0xDE: // 0x0B17C0DE = BC wraper
+ if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 &&
+ magic[3] == (char)0x0B)
+ return file_magic::bitcode;
+ break;
+ case 'B':
+ if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE)
+ return file_magic::bitcode;
+ break;
+ case '!':
+ if (magic.size() >= 8)
+ if (memcmp(magic.data(),"!<arch>\n",8) == 0)
+ return file_magic::archive;
+ break;
+
+ case '\177':
+ if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') {
+ if (magic.size() >= 18 && magic[17] == 0)
+ switch (magic[16]) {
+ default: break;
+ case 1: return file_magic::elf_relocatable;
+ case 2: return file_magic::elf_executable;
+ case 3: return file_magic::elf_shared_object;
+ case 4: return file_magic::elf_core;
+ }
+ }
+ break;
+
+ case 0xCA:
+ if (magic[1] == char(0xFE) && magic[2] == char(0xBA) &&
+ magic[3] == char(0xBE)) {
+ // This is complicated by an overlap with Java class files.
+ // See the Mach-O section in /usr/share/file/magic for details.
+ if (magic.size() >= 8 && magic[7] < 43)
+ // FIXME: Universal Binary of any type.
+ return file_magic::macho_dynamically_linked_shared_lib;
+ }
+ break;
+
+ // The two magic numbers for mach-o are:
+ // 0xfeedface - 32-bit mach-o
+ // 0xfeedfacf - 64-bit mach-o
+ case 0xFE:
+ case 0xCE:
+ case 0xCF: {
+ uint16_t type = 0;
+ if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
+ magic[2] == char(0xFA) &&
+ (magic[3] == char(0xCE) || magic[3] == char(0xCF))) {
+ /* Native endian */
+ if (magic.size() >= 16) type = magic[14] << 8 | magic[15];
+ } else if ((magic[0] == char(0xCE) || magic[0] == char(0xCF)) &&
+ magic[1] == char(0xFA) && magic[2] == char(0xED) &&
+ magic[3] == char(0xFE)) {
+ /* Reverse endian */
+ if (magic.size() >= 14) type = magic[13] << 8 | magic[12];
+ }
+ switch (type) {
+ default: break;
+ case 1: return file_magic::macho_object;
+ case 2: return file_magic::macho_executable;
+ case 3: return file_magic::macho_fixed_virtual_memory_shared_lib;
+ case 4: return file_magic::macho_core;
+ case 5: return file_magic::macho_preload_executabl;
+ case 6: return file_magic::macho_dynamically_linked_shared_lib;
+ case 7: return file_magic::macho_dynamic_linker;
+ case 8: return file_magic::macho_bundle;
+ case 9: return file_magic::macho_dynamic_linker;
+ case 10: return file_magic::macho_dsym_companion;
+ }
+ break;
+ }
+ case 0xF0: // PowerPC Windows
+ case 0x83: // Alpha 32-bit
+ case 0x84: // Alpha 64-bit
+ case 0x66: // MPS R4000 Windows
+ case 0x50: // mc68K
+ case 0x4c: // 80386 Windows
+ if (magic[1] == 0x01)
+ return file_magic::coff_object;
+
+ case 0x90: // PA-RISC Windows
+ case 0x68: // mc68K Windows
+ if (magic[1] == 0x02)
+ return file_magic::coff_object;
+ break;
+
+ case 0x4d: // Possible MS-DOS stub on Windows PE file
+ if (magic[1] == 0x5a) {
+ uint32_t off =
+ *reinterpret_cast<const support::ulittle32_t*>(magic.data() + 0x3c);
+ // PE/COFF file, either EXE or DLL.
+ if (off < magic.size() && memcmp(magic.data() + off, "PE\0\0",4) == 0)
+ return file_magic::pecoff_executable;
+ }
+ break;
+
+ case 0x64: // x86-64 Windows.
+ if (magic[1] == char(0x86))
+ return file_magic::coff_object;
+ break;
+
+ default:
+ break;
+ }
+ return file_magic::unknown;
+}
+
+error_code identify_magic(const Twine &path, file_magic &result) {
SmallString<32> Magic;
error_code ec = get_magic(path, Magic.capacity(), Magic);
if (ec && ec != errc::value_too_large)
return ec;
- result = IdentifyFileType(Magic.data(), Magic.size());
+ result = identify_magic(Magic);
return success;
}
@@ -753,7 +865,9 @@ error_code remove_all_r(StringRef path, file_type ft, uint32_t &count) {
if (ft == file_type::directory_file) {
// This code would be a lot better with exceptions ;/.
error_code ec;
- for (directory_iterator i(path, ec), e; i != e; i.increment(ec)) {
+ directory_iterator i(path, ec);
+ if (ec) return ec;
+ for (directory_iterator e; i != e; i.increment(ec)) {
if (ec) return ec;
file_status st;
if (error_code ec = i->status(st)) return ec;
diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp
index 01860b0..75bc282 100644
--- a/lib/Support/Program.cpp
+++ b/lib/Support/Program.cpp
@@ -13,6 +13,7 @@
#include "llvm/Support/Program.h"
#include "llvm/Config/config.h"
+#include "llvm/Support/system_error.h"
using namespace llvm;
using namespace sys;
diff --git a/lib/Support/RWMutex.cpp b/lib/Support/RWMutex.cpp
index d0b1e10..d14b976 100644
--- a/lib/Support/RWMutex.cpp
+++ b/lib/Support/RWMutex.cpp
@@ -20,7 +20,7 @@
//=== independent code.
//===----------------------------------------------------------------------===//
-#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
// Define all methods as no-ops if threading is explicitly disabled
namespace llvm {
using namespace sys;
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index 04a44a0..d8a6ad3 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -73,9 +73,12 @@ void Statistic::RegisterStatistic() {
if (Enabled)
StatInfo->addStatistic(this);
+ TsanHappensBefore(this);
sys::MemoryFence();
// Remember we have been registered.
+ TsanIgnoreWritesBegin();
Initialized = true;
+ TsanIgnoreWritesEnd();
}
}
diff --git a/lib/Support/ThreadLocal.cpp b/lib/Support/ThreadLocal.cpp
index fdb251c..08b12b6 100644
--- a/lib/Support/ThreadLocal.cpp
+++ b/lib/Support/ThreadLocal.cpp
@@ -19,7 +19,7 @@
//=== independent code.
//===----------------------------------------------------------------------===//
-#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
// Define all methods as no-ops if threading is explicitly disabled
namespace llvm {
using namespace sys;
diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp
index 8f0bb93..7483225 100644
--- a/lib/Support/Threading.cpp
+++ b/lib/Support/Threading.cpp
@@ -24,7 +24,7 @@ static bool multithreaded_mode = false;
static sys::Mutex* global_lock = 0;
bool llvm::llvm_start_multithreaded() {
-#if ENABLE_THREADS != 0
+#if LLVM_ENABLE_THREADS != 0
assert(!multithreaded_mode && "Already multithreaded!");
multithreaded_mode = true;
global_lock = new sys::Mutex(true);
@@ -39,7 +39,7 @@ bool llvm::llvm_start_multithreaded() {
}
void llvm::llvm_stop_multithreaded() {
-#if ENABLE_THREADS != 0
+#if LLVM_ENABLE_THREADS != 0
assert(multithreaded_mode && "Not currently multithreaded!");
// We fence here to insure that all threaded operations are complete BEFORE we
@@ -63,7 +63,7 @@ void llvm::llvm_release_global_lock() {
if (multithreaded_mode) global_lock->release();
}
-#if ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H)
+#if LLVM_ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H)
#include <pthread.h>
struct ThreadInfo {
@@ -102,7 +102,7 @@ void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
error:
::pthread_attr_destroy(&Attr);
}
-#elif ENABLE_THREADS!=0 && defined(LLVM_ON_WIN32)
+#elif LLVM_ENABLE_THREADS!=0 && defined(LLVM_ON_WIN32)
#include "Windows/Windows.h"
#include <process.h>
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index ac4f005..8f58e70 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -20,6 +20,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case arm: return "arm";
case cellspu: return "cellspu";
+ case hexagon: return "hexagon";
case mips: return "mips";
case mipsel: return "mipsel";
case mips64: return "mips64";
@@ -59,6 +60,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case mblaze: return "mblaze";
+ case hexagon: return "hexagon";
+
case sparcv9:
case sparc: return "sparc";
@@ -150,6 +153,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
return ppc;
if (Name == "mblaze")
return mblaze;
+ if (Name == "hexagon")
+ return hexagon;
if (Name == "sparc")
return sparc;
if (Name == "sparcv9")
@@ -295,6 +300,8 @@ Triple::ArchType Triple::ParseArch(StringRef ArchName) {
return mips64;
else if (ArchName == "mips64el")
return mips64el;
+ else if (ArchName == "hexagon")
+ return hexagon;
else if (ArchName == "sparc")
return sparc;
else if (ArchName == "sparcv9")
diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc
index b5488de..aebb4ab 100644
--- a/lib/Support/Unix/PathV2.inc
+++ b/lib/Support/Unix/PathV2.inc
@@ -275,28 +275,17 @@ error_code exists(const Twine &path, bool &result) {
return success;
}
-error_code equivalent(const Twine &A, const Twine &B, bool &result) {
- // Get arguments.
- SmallString<128> a_storage;
- SmallString<128> b_storage;
- StringRef a = A.toNullTerminatedStringRef(a_storage);
- StringRef b = B.toNullTerminatedStringRef(b_storage);
-
- struct stat stat_a, stat_b;
- int error_b = ::stat(b.begin(), &stat_b);
- int error_a = ::stat(a.begin(), &stat_a);
-
- // If both are invalid, it's an error. If only one is, the result is false.
- if (error_a != 0 || error_b != 0) {
- if (error_a == error_b)
- return error_code(errno, system_category());
- result = false;
- } else {
- result =
- stat_a.st_dev == stat_b.st_dev &&
- stat_a.st_ino == stat_b.st_ino;
- }
+bool equivalent(file_status A, file_status B) {
+ assert(status_known(A) && status_known(B));
+ return A.st_dev == B.st_dev &&
+ A.st_ino == B.st_ino;
+}
+error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+ file_status fsA, fsB;
+ if (error_code ec = status(A, fsA)) return ec;
+ if (error_code ec = status(B, fsB)) return ec;
+ result = equivalent(fsA, fsB);
return success;
}
@@ -343,6 +332,9 @@ error_code status(const Twine &path, file_status &result) {
else
result = file_status(file_type::type_unknown);
+ result.st_dev = status.st_dev;
+ result.st_ino = status.st_ino;
+
return success;
}
@@ -441,7 +433,8 @@ rety_open_create:
return success;
}
-error_code directory_iterator_construct(directory_iterator &it, StringRef path){
+error_code detail::directory_iterator_construct(detail::DirIterState &it,
+ StringRef path){
SmallString<128> path_null(path);
DIR *directory = ::opendir(path_null.c_str());
if (directory == 0)
@@ -454,7 +447,7 @@ error_code directory_iterator_construct(directory_iterator &it, StringRef path){
return directory_iterator_increment(it);
}
-error_code directory_iterator_destruct(directory_iterator& it) {
+error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
if (it.IterationHandle)
::closedir(reinterpret_cast<DIR *>(it.IterationHandle));
it.IterationHandle = 0;
@@ -462,7 +455,7 @@ error_code directory_iterator_destruct(directory_iterator& it) {
return success;
}
-error_code directory_iterator_increment(directory_iterator& it) {
+error_code detail::directory_iterator_increment(detail::DirIterState &it) {
errno = 0;
dirent *cur_dir = ::readdir(reinterpret_cast<DIR *>(it.IterationHandle));
if (cur_dir == 0 && errno != 0) {
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index 346baf1..e5990d0 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -412,19 +412,19 @@ Program::Kill(std::string* ErrMsg) {
return false;
}
-bool Program::ChangeStdinToBinary(){
+error_code Program::ChangeStdinToBinary(){
// Do nothing, as Unix doesn't differentiate between text and binary.
- return false;
+ return make_error_code(errc::success);
}
-bool Program::ChangeStdoutToBinary(){
+error_code Program::ChangeStdoutToBinary(){
// Do nothing, as Unix doesn't differentiate between text and binary.
- return false;
+ return make_error_code(errc::success);
}
-bool Program::ChangeStderrToBinary(){
+error_code Program::ChangeStderrToBinary(){
// Do nothing, as Unix doesn't differentiate between text and binary.
- return false;
+ return make_error_code(errc::success);
}
}
diff --git a/lib/Support/Valgrind.cpp b/lib/Support/Valgrind.cpp
index 078d705..2b250a3 100644
--- a/lib/Support/Valgrind.cpp
+++ b/lib/Support/Valgrind.cpp
@@ -53,6 +53,7 @@ void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
#endif // !HAVE_VALGRIND_VALGRIND_H
+#if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG)
// These functions require no implementation, tsan just looks at the arguments
// they're called with.
extern "C" {
@@ -63,3 +64,4 @@ void AnnotateHappensAfter(const char *file, int line,
void AnnotateIgnoreWritesBegin(const char *file, int line) {}
void AnnotateIgnoreWritesEnd(const char *file, int line) {}
}
+#endif
diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc
index bc597b2..7ca33c0 100644
--- a/lib/Support/Windows/PathV2.inc
+++ b/lib/Support/Windows/PathV2.inc
@@ -17,7 +17,6 @@
//===----------------------------------------------------------------------===//
#include "Windows.h"
-#include <wincrypt.h>
#include <fcntl.h>
#include <io.h>
#include <sys/stat.h>
@@ -112,14 +111,6 @@ namespace {
return success;
}
- // Forwarder for ScopedHandle.
- BOOL WINAPI CryptReleaseContext(HCRYPTPROV Provider) {
- return ::CryptReleaseContext(Provider, 0);
- }
-
- typedef ScopedHandle<HCRYPTPROV, uintptr_t(-1),
- BOOL (WINAPI*)(HCRYPTPROV), CryptReleaseContext>
- ScopedCryptContext;
bool is_separator(const wchar_t value) {
switch (value) {
case L'\\':
@@ -359,68 +350,22 @@ error_code exists(const Twine &path, bool &result) {
return success;
}
-error_code equivalent(const Twine &A, const Twine &B, bool &result) {
- // Get arguments.
- SmallString<128> a_storage;
- SmallString<128> b_storage;
- StringRef a = A.toStringRef(a_storage);
- StringRef b = B.toStringRef(b_storage);
-
- // Convert to utf-16.
- SmallVector<wchar_t, 128> wide_a;
- SmallVector<wchar_t, 128> wide_b;
- if (error_code ec = UTF8ToUTF16(a, wide_a)) return ec;
- if (error_code ec = UTF8ToUTF16(b, wide_b)) return ec;
-
- AutoHandle HandleB(
- ::CreateFileW(wide_b.begin(),
- 0,
- FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
- 0,
- OPEN_EXISTING,
- FILE_FLAG_BACKUP_SEMANTICS,
- 0));
-
- AutoHandle HandleA(
- ::CreateFileW(wide_a.begin(),
- 0,
- FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
- 0,
- OPEN_EXISTING,
- FILE_FLAG_BACKUP_SEMANTICS,
- 0));
-
- // If both handles are invalid, it's an error.
- if (HandleA == INVALID_HANDLE_VALUE &&
- HandleB == INVALID_HANDLE_VALUE)
- return windows_error(::GetLastError());
-
- // If only one is invalid, it's false.
- if (HandleA == INVALID_HANDLE_VALUE &&
- HandleB == INVALID_HANDLE_VALUE) {
- result = false;
- return success;
- }
-
- // Get file information.
- BY_HANDLE_FILE_INFORMATION InfoA, InfoB;
- if (!::GetFileInformationByHandle(HandleA, &InfoA))
- return windows_error(::GetLastError());
- if (!::GetFileInformationByHandle(HandleB, &InfoB))
- return windows_error(::GetLastError());
-
- // See if it's all the same.
- result =
- InfoA.dwVolumeSerialNumber == InfoB.dwVolumeSerialNumber &&
- InfoA.nFileIndexHigh == InfoB.nFileIndexHigh &&
- InfoA.nFileIndexLow == InfoB.nFileIndexLow &&
- InfoA.nFileSizeHigh == InfoB.nFileSizeHigh &&
- InfoA.nFileSizeLow == InfoB.nFileSizeLow &&
- InfoA.ftLastWriteTime.dwLowDateTime ==
- InfoB.ftLastWriteTime.dwLowDateTime &&
- InfoA.ftLastWriteTime.dwHighDateTime ==
- InfoB.ftLastWriteTime.dwHighDateTime;
+bool equivalent(file_status A, file_status B) {
+ assert(status_known(A) && status_known(B));
+ return A.FileIndexHigh == B.FileIndexHigh &&
+ A.FileIndexLow == B.FileIndexLow &&
+ A.FileSizeHigh == B.FileSizeHigh &&
+ A.FileSizeLow == B.FileSizeLow &&
+ A.LastWriteTimeHigh == B.LastWriteTimeHigh &&
+ A.LastWriteTimeLow == B.LastWriteTimeLow &&
+ A.VolumeSerialNumber == B.VolumeSerialNumber;
+}
+error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+ file_status fsA, fsB;
+ if (error_code ec = status(A, fsA)) return ec;
+ if (error_code ec = status(B, fsB)) return ec;
+ result = equivalent(fsA, fsB);
return success;
}
@@ -478,8 +423,7 @@ error_code status(const Twine &path, file_status &result) {
return success;
}
- if (error_code ec = UTF8ToUTF16(path8,
- path_utf16))
+ if (error_code ec = UTF8ToUTF16(path8, path_utf16))
return ec;
DWORD attr = ::GetFileAttributesW(path_utf16.begin());
@@ -488,7 +432,7 @@ error_code status(const Twine &path, file_status &result) {
// Handle reparse points.
if (attr & FILE_ATTRIBUTE_REPARSE_POINT) {
- AutoHandle h(
+ ScopedFileHandle h(
::CreateFileW(path_utf16.begin(),
0, // Attributes only.
FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
@@ -496,14 +440,35 @@ error_code status(const Twine &path, file_status &result) {
OPEN_EXISTING,
FILE_FLAG_BACKUP_SEMANTICS,
0));
- if (h == INVALID_HANDLE_VALUE)
+ if (!h)
goto handle_status_error;
}
if (attr & FILE_ATTRIBUTE_DIRECTORY)
result = file_status(file_type::directory_file);
- else
+ else {
result = file_status(file_type::regular_file);
+ ScopedFileHandle h(
+ ::CreateFileW(path_utf16.begin(),
+ 0, // Attributes only.
+ FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+ NULL,
+ OPEN_EXISTING,
+ FILE_FLAG_BACKUP_SEMANTICS,
+ 0));
+ if (!h)
+ goto handle_status_error;
+ BY_HANDLE_FILE_INFORMATION Info;
+ if (!::GetFileInformationByHandle(h, &Info))
+ goto handle_status_error;
+ result.FileIndexHigh = Info.nFileIndexHigh;
+ result.FileIndexLow = Info.nFileIndexLow;
+ result.FileSizeHigh = Info.nFileSizeHigh;
+ result.FileSizeLow = Info.nFileSizeLow;
+ result.LastWriteTimeHigh = Info.ftLastWriteTime.dwHighDateTime;
+ result.LastWriteTimeLow = Info.ftLastWriteTime.dwLowDateTime;
+ result.VolumeSerialNumber = Info.dwVolumeSerialNumber;
+ }
return success;
@@ -535,7 +500,7 @@ error_code unique_file(const Twine &model, int &result_fd,
if (makeAbsolute) {
// Make model absolute by prepending a temp directory if it's not already.
bool absolute = path::is_absolute(m);
-
+
if (!absolute) {
SmallVector<wchar_t, 64> temp_dir;
if (error_code ec = TempDir(temp_dir)) return ec;
@@ -691,7 +656,8 @@ error_code get_magic(const Twine &path, uint32_t len,
return success;
}
-error_code directory_iterator_construct(directory_iterator &it, StringRef path){
+error_code detail::directory_iterator_construct(detail::DirIterState &it,
+ StringRef path){
SmallVector<wchar_t, 128> path_utf16;
if (error_code ec = UTF8ToUTF16(path,
@@ -722,7 +688,7 @@ error_code directory_iterator_construct(directory_iterator &it, StringRef path){
error_code ec = windows_error(::GetLastError());
// Check for end.
if (ec == windows_error::no_more_files)
- return directory_iterator_destruct(it);
+ return detail::directory_iterator_destruct(it);
return ec;
} else
FilenameLen = ::wcslen(FirstFind.cFileName);
@@ -742,7 +708,7 @@ error_code directory_iterator_construct(directory_iterator &it, StringRef path){
return success;
}
-error_code directory_iterator_destruct(directory_iterator& it) {
+error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
if (it.IterationHandle != 0)
// Closes the handle if it's valid.
ScopedFindHandle close(HANDLE(it.IterationHandle));
@@ -751,13 +717,13 @@ error_code directory_iterator_destruct(directory_iterator& it) {
return success;
}
-error_code directory_iterator_increment(directory_iterator& it) {
+error_code detail::directory_iterator_increment(detail::DirIterState &it) {
WIN32_FIND_DATAW FindData;
if (!::FindNextFileW(HANDLE(it.IterationHandle), &FindData)) {
error_code ec = windows_error(::GetLastError());
// Check for end.
if (ec == windows_error::no_more_files)
- return directory_iterator_destruct(it);
+ return detail::directory_iterator_destruct(it);
return ec;
}
diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc
index 7e38168..80ccaa6 100644
--- a/lib/Support/Windows/Program.inc
+++ b/lib/Support/Windows/Program.inc
@@ -299,14 +299,14 @@ Program::Execute(const Path& path,
Data_ = wpi;
// Make sure these get closed no matter what.
- AutoHandle hThread(pi.hThread);
+ ScopedCommonHandle hThread(pi.hThread);
// Assign the process to a job if a memory limit is defined.
- AutoHandle hJob(0);
+ ScopedJobHandle hJob;
if (memoryLimit != 0) {
hJob = CreateJobObject(0, 0);
bool success = false;
- if (hJob != 0) {
+ if (hJob) {
JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli;
memset(&jeli, 0, sizeof(jeli));
jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_PROCESS_MEMORY;
@@ -367,7 +367,17 @@ Program::Wait(const Path &path,
return -2;
}
- return status & 0377;
+ if (!status)
+ return 0;
+
+ // Pass 10(Warning) and 11(Error) to the callee as negative value.
+ if ((status & 0xBFFF0000U) == 0x80000000U)
+ return (int)status;
+
+ if (status & 0xFF)
+ return status & 0x7FFFFFFF;
+
+ return 1;
}
bool
@@ -387,19 +397,25 @@ Program::Kill(std::string* ErrMsg) {
return false;
}
-bool Program::ChangeStdinToBinary(){
+error_code Program::ChangeStdinToBinary(){
int result = _setmode( _fileno(stdin), _O_BINARY );
- return result == -1;
+ if (result == -1)
+ return error_code(errno, generic_category());
+ return make_error_code(errc::success);
}
-bool Program::ChangeStdoutToBinary(){
+error_code Program::ChangeStdoutToBinary(){
int result = _setmode( _fileno(stdout), _O_BINARY );
- return result == -1;
+ if (result == -1)
+ return error_code(errno, generic_category());
+ return make_error_code(errc::success);
}
-bool Program::ChangeStderrToBinary(){
+error_code Program::ChangeStderrToBinary(){
int result = _setmode( _fileno(stderr), _O_BINARY );
- return result == -1;
+ if (result == -1)
+ return error_code(errno, generic_category());
+ return make_error_code(errc::success);
}
}
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
index 0d4b8a2..3a7e90b 100644
--- a/lib/Support/Windows/Signals.inc
+++ b/lib/Support/Windows/Signals.inc
@@ -446,7 +446,7 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
}
if (ExitOnUnhandledExceptions)
- _exit(-3);
+ _exit(ep->ExceptionRecord->ExceptionCode);
// Allow dialog box to pop up allowing choice to start debugger.
if (OldFilter)
diff --git a/lib/Support/Windows/Windows.h b/lib/Support/Windows/Windows.h
index 67b6f01..5c1da0d 100644
--- a/lib/Support/Windows/Windows.h
+++ b/lib/Support/Windows/Windows.h
@@ -26,6 +26,7 @@
#include "llvm/Config/config.h" // Get build system configuration settings
#include <windows.h>
+#include <wincrypt.h>
#include <shlobj.h>
#include <cassert>
#include <string>
@@ -41,70 +42,99 @@ inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
return true;
}
-class AutoHandle {
- HANDLE handle;
+template <typename HandleTraits>
+class ScopedHandle {
+ typedef typename HandleTraits::handle_type handle_type;
+ handle_type Handle;
+ ScopedHandle(const ScopedHandle &other); // = delete;
+ void operator=(const ScopedHandle &other); // = delete;
public:
- AutoHandle(HANDLE h) : handle(h) {}
+ ScopedHandle()
+ : Handle(HandleTraits::GetInvalid()) {}
+
+ explicit ScopedHandle(handle_type h)
+ : Handle(h) {}
- ~AutoHandle() {
- if (handle)
- CloseHandle(handle);
+ ~ScopedHandle() {
+ if (HandleTraits::IsValid(Handle))
+ HandleTraits::Close(Handle);
}
- operator HANDLE() {
- return handle;
+ handle_type take() {
+ handle_type t = Handle;
+ Handle = HandleTraits::GetInvalid();
+ return t;
}
- AutoHandle &operator=(HANDLE h) {
- handle = h;
+ ScopedHandle &operator=(handle_type h) {
+ if (HandleTraits::IsValid(Handle))
+ HandleTraits::Close(Handle);
+ Handle = h;
return *this;
}
+
+ // True if Handle is valid.
+ operator bool() const {
+ return HandleTraits::IsValid(Handle) ? true : false;
+ }
+
+ operator handle_type() const {
+ return Handle;
+ }
};
-template <class HandleType, uintptr_t InvalidHandle,
- class DeleterType, DeleterType D>
-class ScopedHandle {
- HandleType Handle;
+struct CommonHandleTraits {
+ typedef HANDLE handle_type;
-public:
- ScopedHandle() : Handle(InvalidHandle) {}
- ScopedHandle(HandleType handle) : Handle(handle) {}
+ static handle_type GetInvalid() {
+ return INVALID_HANDLE_VALUE;
+ }
- ~ScopedHandle() {
- if (Handle != HandleType(InvalidHandle))
- D(Handle);
+ static void Close(handle_type h) {
+ ::CloseHandle(h);
}
- HandleType take() {
- HandleType temp = Handle;
- Handle = HandleType(InvalidHandle);
- return temp;
+ static bool IsValid(handle_type h) {
+ return h != GetInvalid();
}
+};
- operator HandleType() const { return Handle; }
+struct JobHandleTraits : CommonHandleTraits {
+ static handle_type GetInvalid() {
+ return NULL;
+ }
+};
- ScopedHandle &operator=(HandleType handle) {
- Handle = handle;
- return *this;
+struct CryptContextTraits : CommonHandleTraits {
+ typedef HCRYPTPROV handle_type;
+
+ static handle_type GetInvalid() {
+ return 0;
}
- typedef void (*unspecified_bool_type)();
- static void unspecified_bool_true() {}
+ static void Close(handle_type h) {
+ ::CryptReleaseContext(h, 0);
+ }
- // True if Handle is valid.
- operator unspecified_bool_type() const {
- return Handle == HandleType(InvalidHandle) ? 0 : unspecified_bool_true;
+ static bool IsValid(handle_type h) {
+ return h != GetInvalid();
}
+};
- bool operator!() const {
- return Handle == HandleType(InvalidHandle);
+struct FindHandleTraits : CommonHandleTraits {
+ static void Close(handle_type h) {
+ ::FindClose(h);
}
};
-typedef ScopedHandle<HANDLE, uintptr_t(-1),
- BOOL (WINAPI*)(HANDLE), ::FindClose>
- ScopedFindHandle;
+struct FileHandleTraits : CommonHandleTraits {};
+
+typedef ScopedHandle<CommonHandleTraits> ScopedCommonHandle;
+typedef ScopedHandle<FileHandleTraits> ScopedFileHandle;
+typedef ScopedHandle<CryptContextTraits> ScopedCryptContext;
+typedef ScopedHandle<FindHandleTraits> ScopedFindHandle;
+typedef ScopedHandle<JobHandleTraits> ScopedJobHandle;
namespace llvm {
template <class T>
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 4927e9a..72d3986 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -20,6 +20,7 @@
#include "llvm/Config/config.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/system_error.h"
#include "llvm/ADT/STLExtras.h"
#include <cctype>
#include <cerrno>
diff --git a/lib/TableGen/CMakeLists.txt b/lib/TableGen/CMakeLists.txt
index 0db4134..e678087 100644
--- a/lib/TableGen/CMakeLists.txt
+++ b/lib/TableGen/CMakeLists.txt
@@ -10,7 +10,3 @@ add_llvm_library(LLVMTableGen
TGLexer.cpp
TGParser.cpp
)
-
-add_llvm_library_dependencies(LLVMTableGen
- LLVMSupport
- )
diff --git a/lib/TableGen/LLVMBuild.txt b/lib/TableGen/LLVMBuild.txt
index 4e24c37..54cedfd 100644
--- a/lib/TableGen/LLVMBuild.txt
+++ b/lib/TableGen/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = TableGen
parent = Libraries
required_libraries = Support
-
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index c06add4..8bcb029 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -2219,6 +2219,8 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
Record *DefProto = MC->DefPrototypes[i];
Record *CurRec = InstantiateMulticlassDef(*MC, DefProto, DefmPrefix, DefmPrefixLoc);
+ if (!CurRec)
+ return true;
if (ResolveMulticlassDefArgs(*MC, CurRec, DefmPrefixLoc, SubClassLoc,
TArgs, TemplateVals, true/*Delete args*/))
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index bbca228..6ae287a 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -493,11 +493,21 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return false;
}
- // These modifiers are not yet supported.
- case 'p': // The high single-precision register of a VFP double-precision
- // register.
case 'e': // The low doubleword register of a NEON quad register.
- case 'f': // The high doubleword register of a NEON quad register.
+ case 'f': { // The high doubleword register of a NEON quad register.
+ if (!MI->getOperand(OpNum).isReg())
+ return true;
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ if (!ARM::QPRRegClass.contains(Reg))
+ return true;
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ?
+ ARM::dsub_0 : ARM::dsub_1);
+ O << ARMInstPrinter::getRegisterName(SubReg);
+ return false;
+ }
+
+ // These modifiers are not yet supported.
case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
case 'H': // The highest-numbered register of a pair.
return true;
@@ -739,14 +749,14 @@ void ARMAsmPrinter::emitAttributes() {
}
// Signal various FP modes.
- if (!UnsafeFPMath) {
+ if (!TM.Options.UnsafeFPMath) {
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal,
ARMBuildAttrs::Allowed);
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
ARMBuildAttrs::Allowed);
}
- if (NoInfsFPMath && NoNaNsFPMath)
+ if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath)
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
ARMBuildAttrs::Allowed);
else
@@ -759,7 +769,7 @@ void ARMAsmPrinter::emitAttributes() {
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
// Hard float. Use both S and D registers and conform to AAPCS-VFP.
- if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) {
+ if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) {
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3);
AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1);
}
@@ -1069,7 +1079,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
}
// Try to figure out the unwinding opcode out of src / dst regs.
- if (MI->getDesc().mayStore()) {
+ if (MI->mayStore()) {
// Register saves.
assert(DstReg == ARM::SP &&
"Only stack pointer as a destination reg is supported");
@@ -1481,11 +1491,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
/// in the function. The first operand is the ID# for this instruction, the
/// second is the index into the MachineConstantPool that this is, the third
/// is the size in bytes of this constant pool entry.
+ /// The required alignment is specified on the basic block holding this MI.
unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
- EmitAlignment(2);
-
// Mark the constant pool entry as data if we're not already in a data
// region.
OutStreamer.EmitDataRegion();
@@ -1934,4 +1943,3 @@ extern "C" void LLVMInitializeARMAsmPrinter() {
RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget);
RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget);
}
-
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 9315348..8bf5475 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -146,7 +146,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MCID.getNumOperands();
- bool isLoad = !MCID.mayStore();
+ bool isLoad = !MI->mayStore();
const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
const MachineOperand &Base = MI->getOperand(2);
const MachineOperand &Offset = MI->getOperand(NumOps-3);
@@ -439,6 +439,22 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
return false;
}
+bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
+ if (MI->isBundle()) {
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ int PIdx = I->findFirstPredOperandIdx();
+ if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
+ return true;
+ }
+ return false;
+ }
+
+ int PIdx = MI->findFirstPredOperandIdx();
+ return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+}
+
bool ARMBaseInstrInfo::
PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const {
@@ -491,7 +507,7 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const {
// FIXME: This confuses implicit_def with optional CPSR def.
const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef())
+ if (!MCID.getImplicitDefs() && !MI->hasOptionalDef())
return false;
bool Found = false;
@@ -510,11 +526,10 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
/// By default, this returns true for every instruction with a
/// PredicateOperand.
bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isPredicable())
+ if (!MI->isPredicable())
return false;
- if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
+ if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
ARMFunctionInfo *AFI =
MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
return AFI->isThumb2Function();
@@ -548,7 +563,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
if (MI->isLabel())
return 0;
- unsigned Opc = MI->getOpcode();
+ unsigned Opc = MI->getOpcode();
switch (Opc) {
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
@@ -556,6 +571,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
case TargetOpcode::EH_LABEL:
case TargetOpcode::DBG_VALUE:
return 0;
+ case TargetOpcode::BUNDLE:
+ return getInstBundleLength(MI);
case ARM::MOVi16_ga_pcrel:
case ARM::MOVTi16_ga_pcrel:
case ARM::t2MOVi16_ga_pcrel:
@@ -593,7 +610,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
unsigned NumOps = MCID.getNumOperands();
MachineOperand JTOP =
- MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2));
+ MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2));
unsigned JTI = JTOP.getIndex();
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
assert(MJTI != 0);
@@ -622,6 +639,17 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return 0; // Not reached
}
+unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
+ unsigned Size = 0;
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ assert(!I->isBundle() && "No nested bundle!");
+ Size += GetInstSizeInBytes(&*I);
+ }
+ return Size;
+}
+
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -845,7 +873,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const {
const MachineMemOperand *Dummy;
- return MI->getDesc().mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
+ return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
}
void ARMBaseInstrInfo::
@@ -991,7 +1019,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const {
const MachineMemOperand *Dummy;
- return MI->getDesc().mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+ return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
}
bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
@@ -1357,7 +1385,7 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
return false;
// Terminators and labels can't be scheduled around.
- if (MI->getDesc().isTerminator() || MI->isLabel())
+ if (MI->isTerminator() || MI->isLabel())
return true;
// Treat the start of the IT block as a scheduling boundary, but schedule
@@ -1762,8 +1790,7 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
// Check that CPSR isn't set between the comparison instruction and the one we
// want to change.
- MachineBasicBlock::const_iterator I = CmpInstr, E = MI,
- B = MI->getParent()->begin();
+ MachineBasicBlock::iterator I = CmpInstr,E = MI, B = MI->getParent()->begin();
// Early exit if CmpInstr is at the beginning of the BB.
if (I == B) return false;
@@ -1957,7 +1984,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
bool isKill = UseMI->getOperand(OpIdx).isKill();
unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
- *UseMI, UseMI->getDebugLoc(),
+ UseMI, UseMI->getDebugLoc(),
get(NewUseOpc), NewReg)
.addReg(Reg1, getKillRegState(isKill))
.addImm(SOImmValV1)));
@@ -2332,6 +2359,59 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return UseCycle;
}
+static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
+ const MachineInstr *MI, unsigned Reg,
+ unsigned &DefIdx, unsigned &Dist) {
+ Dist = 0;
+
+ MachineBasicBlock::const_iterator I = MI; ++I;
+ MachineBasicBlock::const_instr_iterator II =
+ llvm::prior(I.getInstrIterator());
+ assert(II->isInsideBundle() && "Empty bundle?");
+
+ int Idx = -1;
+ while (II->isInsideBundle()) {
+ Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
+ if (Idx != -1)
+ break;
+ --II;
+ ++Dist;
+ }
+
+ assert(Idx != -1 && "Cannot find bundled definition!");
+ DefIdx = Idx;
+ return II;
+}
+
+static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
+ const MachineInstr *MI, unsigned Reg,
+ unsigned &UseIdx, unsigned &Dist) {
+ Dist = 0;
+
+ MachineBasicBlock::const_instr_iterator II = MI; ++II;
+ assert(II->isInsideBundle() && "Empty bundle?");
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+
+ // FIXME: This doesn't properly handle multiple uses.
+ int Idx = -1;
+ while (II != E && II->isInsideBundle()) {
+ Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
+ if (Idx != -1)
+ break;
+ if (II->getOpcode() != ARM::t2IT)
+ ++Dist;
+ ++II;
+ }
+
+ if (Idx == -1) {
+ Dist = 0;
+ return 0;
+ }
+
+ UseIdx = Idx;
+ return II;
+}
+
int
ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI, unsigned DefIdx,
@@ -2340,35 +2420,77 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
DefMI->isRegSequence() || DefMI->isImplicitDef())
return 1;
- const MCInstrDesc &DefMCID = DefMI->getDesc();
if (!ItinData || ItinData->isEmpty())
- return DefMCID.mayLoad() ? 3 : 1;
+ return DefMI->mayLoad() ? 3 : 1;
- const MCInstrDesc &UseMCID = UseMI->getDesc();
+ const MCInstrDesc *DefMCID = &DefMI->getDesc();
+ const MCInstrDesc *UseMCID = &UseMI->getDesc();
const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
- if (DefMO.getReg() == ARM::CPSR) {
+ unsigned Reg = DefMO.getReg();
+ if (Reg == ARM::CPSR) {
if (DefMI->getOpcode() == ARM::FMSTAT) {
// fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
return Subtarget.isCortexA9() ? 1 : 20;
}
// CPSR set and branch can be paired in the same cycle.
- if (UseMCID.isBranch())
+ if (UseMI->isBranch())
return 0;
+
+ // Otherwise it takes the instruction latency (generally one).
+ int Latency = getInstrLatency(ItinData, DefMI);
+
+ // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
+ // its uses. Instructions which are otherwise scheduled between them may
+ // incur a code size penalty (not able to use the CPSR setting 16-bit
+ // instructions).
+ if (Latency > 0 && Subtarget.isThumb2()) {
+ const MachineFunction *MF = DefMI->getParent()->getParent();
+ if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ --Latency;
+ }
+ return Latency;
}
unsigned DefAlign = DefMI->hasOneMemOperand()
? (*DefMI->memoperands_begin())->getAlignment() : 0;
unsigned UseAlign = UseMI->hasOneMemOperand()
? (*UseMI->memoperands_begin())->getAlignment() : 0;
- int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
- UseMCID, UseIdx, UseAlign);
+
+ unsigned DefAdj = 0;
+ if (DefMI->isBundle()) {
+ DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
+ if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+ DefMI->isRegSequence() || DefMI->isImplicitDef())
+ return 1;
+ DefMCID = &DefMI->getDesc();
+ }
+ unsigned UseAdj = 0;
+ if (UseMI->isBundle()) {
+ unsigned NewUseIdx;
+ const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
+ Reg, NewUseIdx, UseAdj);
+ if (NewUseMI) {
+ UseMI = NewUseMI;
+ UseIdx = NewUseIdx;
+ UseMCID = &UseMI->getDesc();
+ }
+ }
+
+ int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
+ *UseMCID, UseIdx, UseAlign);
+ int Adj = DefAdj + UseAdj;
+ if (Adj) {
+ Latency -= (int)(DefAdj + UseAdj);
+ if (Latency < 1)
+ return 1;
+ }
if (Latency > 1 &&
(Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
// FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
// variants are one cycle cheaper.
- switch (DefMCID.getOpcode()) {
+ switch (DefMCID->getOpcode()) {
default: break;
case ARM::LDRrs:
case ARM::LDRBrs: {
@@ -2393,7 +2515,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
}
if (DefAlign < 8 && Subtarget.isCortexA9())
- switch (DefMCID.getOpcode()) {
+ switch (DefMCID->getOpcode()) {
default: break;
case ARM::VLD1q8:
case ARM::VLD1q16:
@@ -2413,12 +2535,18 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD2q8:
case ARM::VLD2q16:
case ARM::VLD2q32:
- case ARM::VLD2d8_UPD:
- case ARM::VLD2d16_UPD:
- case ARM::VLD2d32_UPD:
- case ARM::VLD2q8_UPD:
- case ARM::VLD2q16_UPD:
- case ARM::VLD2q32_UPD:
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2q8wb_fixed:
+ case ARM::VLD2q16wb_fixed:
+ case ARM::VLD2q32wb_fixed:
+ case ARM::VLD2d8wb_register:
+ case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_register:
+ case ARM::VLD2q8wb_register:
+ case ARM::VLD2q16wb_register:
+ case ARM::VLD2q32wb_register:
case ARM::VLD3d8:
case ARM::VLD3d16:
case ARM::VLD3d32:
@@ -2446,9 +2574,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD1DUPq8:
case ARM::VLD1DUPq16:
case ARM::VLD1DUPq32:
- case ARM::VLD1DUPq8_UPD:
- case ARM::VLD1DUPq16_UPD:
- case ARM::VLD1DUPq32_UPD:
+ case ARM::VLD1DUPq8wb_fixed:
+ case ARM::VLD1DUPq16wb_fixed:
+ case ARM::VLD1DUPq32wb_fixed:
+ case ARM::VLD1DUPq8wb_register:
+ case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq32wb_register:
case ARM::VLD2DUPd8:
case ARM::VLD2DUPd16:
case ARM::VLD2DUPd32:
@@ -2580,12 +2711,18 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD2q8Pseudo:
case ARM::VLD2q16Pseudo:
case ARM::VLD2q32Pseudo:
- case ARM::VLD2d8Pseudo_UPD:
- case ARM::VLD2d16Pseudo_UPD:
- case ARM::VLD2d32Pseudo_UPD:
- case ARM::VLD2q8Pseudo_UPD:
- case ARM::VLD2q16Pseudo_UPD:
- case ARM::VLD2q32Pseudo_UPD:
+ case ARM::VLD2d8PseudoWB_fixed:
+ case ARM::VLD2d16PseudoWB_fixed:
+ case ARM::VLD2d32PseudoWB_fixed:
+ case ARM::VLD2q8PseudoWB_fixed:
+ case ARM::VLD2q16PseudoWB_fixed:
+ case ARM::VLD2q32PseudoWB_fixed:
+ case ARM::VLD2d8PseudoWB_register:
+ case ARM::VLD2d16PseudoWB_register:
+ case ARM::VLD2d32PseudoWB_register:
+ case ARM::VLD2q8PseudoWB_register:
+ case ARM::VLD2q16PseudoWB_register:
+ case ARM::VLD2q32PseudoWB_register:
case ARM::VLD3d8Pseudo:
case ARM::VLD3d16Pseudo:
case ARM::VLD3d32Pseudo:
@@ -2621,9 +2758,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD1DUPq8Pseudo:
case ARM::VLD1DUPq16Pseudo:
case ARM::VLD1DUPq32Pseudo:
- case ARM::VLD1DUPq8Pseudo_UPD:
- case ARM::VLD1DUPq16Pseudo_UPD:
- case ARM::VLD1DUPq32Pseudo_UPD:
+ case ARM::VLD1DUPq8PseudoWB_fixed:
+ case ARM::VLD1DUPq16PseudoWB_fixed:
+ case ARM::VLD1DUPq32PseudoWB_fixed:
+ case ARM::VLD1DUPq8PseudoWB_register:
+ case ARM::VLD1DUPq16PseudoWB_register:
+ case ARM::VLD1DUPq32PseudoWB_register:
case ARM::VLD2DUPd8Pseudo:
case ARM::VLD2DUPd16Pseudo:
case ARM::VLD2DUPd32Pseudo:
@@ -2671,6 +2811,19 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
+unsigned
+ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *DepMI) const {
+ unsigned Reg = DefMI->getOperand(DefIdx).getReg();
+ if (DepMI->readsRegister(Reg, &getRegisterInfo()) || !isPredicated(DepMI))
+ return 1;
+
+ // If the second MI is predicated, then there is an implicit use dependency.
+ return getOperandLatency(ItinData, DefMI, DefIdx, DepMI,
+ DepMI->getNumOperands());
+}
+
int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
@@ -2681,6 +2834,17 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
if (!ItinData || ItinData->isEmpty())
return 1;
+ if (MI->isBundle()) {
+ int Latency = 0;
+ MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ if (I->getOpcode() != ARM::t2IT)
+ Latency += getInstrLatency(ItinData, I, PredCost);
+ }
+ return Latency;
+ }
+
const MCInstrDesc &MCID = MI->getDesc();
unsigned Class = MCID.getSchedClass();
unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 0f9f321..68e8208 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -69,10 +69,7 @@ public:
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
// Predication support.
- bool isPredicated(const MachineInstr *MI) const {
- int PIdx = MI->findFirstPredOperandIdx();
- return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
- }
+ bool isPredicated(const MachineInstr *MI) const;
ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
int PIdx = MI->findFirstPredOperandIdx();
@@ -213,12 +210,18 @@ public:
SDNode *DefNode, unsigned DefIdx,
SDNode *UseNode, unsigned UseIdx) const;
+ virtual unsigned getOutputLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *DepMI) const;
+
/// VFP/NEON execution domains.
std::pair<uint16_t, uint16_t>
getExecutionDomain(const MachineInstr *MI) const;
void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
private:
+ unsigned getInstBundleLength(const MachineInstr *MI) const;
+
int getVLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
unsigned DefClass,
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 7c42342..8ee6ce2 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -631,7 +631,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
// 1. Dynamic stack realignment is explicitly disabled,
// 2. This is a Thumb1 function (it's not useful, so we don't bother), or
// 3. There are VLAs in the function and the base pointer is disabled.
- return (RealignStack && !AFI->isThumb1OnlyFunction() &&
+ return (MF.getTarget().Options.RealignStack && !AFI->isThumb1OnlyFunction() &&
(!MFI->hasVarSizedObjects() || EnableBasePointer));
}
@@ -649,7 +649,7 @@ needsStackRealignment(const MachineFunction &MF) const {
bool ARMBaseRegisterInfo::
cannotEliminateFrame(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- if (DisableFramePointerElim(MF) && MFI->adjustsStack())
+ if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack())
return true;
return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
|| needsStackRealignment(MF);
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index d74ccfa..365f0bb 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -401,7 +401,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB) {
MCE.StartMachineBasicBlock(MBB);
- for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
I != E; ++I)
emitInstruction(*I);
}
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 3e3a413..2039d41 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -26,6 +26,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -51,6 +52,43 @@ static cl::opt<bool>
AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
cl::desc("Adjust basic block layout to better use TB[BH]"));
+static cl::opt<bool>
+AlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true),
+ cl::desc("Align constant islands in code"));
+
+/// UnknownPadding - Return the worst case padding that could result from
+/// unknown offset bits. This does not include alignment padding caused by
+/// known offset bits.
+///
+/// @param LogAlign log2(alignment)
+/// @param KnownBits Number of known low offset bits.
+static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
+ if (KnownBits < LogAlign)
+ return (1u << LogAlign) - (1u << KnownBits);
+ return 0;
+}
+
+/// WorstCaseAlign - Assuming only the low KnownBits bits in Offset are exact,
+/// add padding such that:
+///
+/// 1. The result is aligned to 1 << LogAlign.
+///
+/// 2. No other value of the unknown bits would require more padding.
+///
+/// This may add more padding than is required to satisfy just one of the
+/// constraints. It is necessary to compute alignment this way to guarantee
+/// that we don't underestimate the padding before an aligned block. If the
+/// real padding before a block is larger than we think, constant pool entries
+/// may go out of range.
+static inline unsigned WorstCaseAlign(unsigned Offset, unsigned LogAlign,
+ unsigned KnownBits) {
+ // Add the worst possible padding that the unknown bits could cause.
+ Offset += UnknownPadding(LogAlign, KnownBits);
+
+ // Then align the result.
+ return RoundUpToAlignment(Offset, 1u << LogAlign);
+}
+
namespace {
/// ARMConstantIslands - Due to limited PC-relative displacements, ARM
/// requires constant pool entries to be scattered among the instructions
@@ -64,16 +102,70 @@ namespace {
/// CPE - A constant pool entry that has been placed somewhere, which
/// tracks a list of users.
class ARMConstantIslands : public MachineFunctionPass {
- /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
- /// by MBB Number. The two-byte pads required for Thumb alignment are
- /// counted as part of the following block (i.e., the offset and size for
- /// a padded block will both be ==2 mod 4).
- std::vector<unsigned> BBSizes;
+ /// BasicBlockInfo - Information about the offset and size of a single
+ /// basic block.
+ struct BasicBlockInfo {
+ /// Offset - Distance from the beginning of the function to the beginning
+ /// of this basic block.
+ ///
+ /// The offset is always aligned as required by the basic block.
+ unsigned Offset;
+
+ /// Size - Size of the basic block in bytes. If the block contains
+ /// inline assembly, this is a worst case estimate.
+ ///
+ /// The size does not include any alignment padding whether from the
+ /// beginning of the block, or from an aligned jump table at the end.
+ unsigned Size;
+
+ /// KnownBits - The number of low bits in Offset that are known to be
+ /// exact. The remaining bits of Offset are an upper bound.
+ uint8_t KnownBits;
+
+ /// Unalign - When non-zero, the block contains instructions (inline asm)
+ /// of unknown size. The real size may be smaller than Size bytes by a
+ /// multiple of 1 << Unalign.
+ uint8_t Unalign;
+
+ /// PostAlign - When non-zero, the block terminator contains a .align
+ /// directive, so the end of the block is aligned to 1 << PostAlign
+ /// bytes.
+ uint8_t PostAlign;
+
+ BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0),
+ PostAlign(0) {}
+
+ /// Compute the number of known offset bits internally to this block.
+ /// This number should be used to predict worst case padding when
+ /// splitting the block.
+ unsigned internalKnownBits() const {
+ return Unalign ? Unalign : KnownBits;
+ }
+
+ /// Compute the offset immediately following this block. If LogAlign is
+ /// specified, return the offset the successor block will get if it has
+ /// this alignment.
+ unsigned postOffset(unsigned LogAlign = 0) const {
+ unsigned PO = Offset + Size;
+ unsigned LA = std::max(unsigned(PostAlign), LogAlign);
+ if (!LA)
+ return PO;
+ // Add alignment padding from the terminator.
+ return WorstCaseAlign(PO, LA, internalKnownBits());
+ }
+
+ /// Compute the number of known low bits of postOffset. If this block
+ /// contains inline asm, the number of known bits drops to the
+ /// instruction alignment. An aligned terminator may increase the number
+ /// of know bits.
+ /// If LogAlign is given, also consider the alignment of the next block.
+ unsigned postKnownBits(unsigned LogAlign = 0) const {
+ return std::max(std::max(unsigned(PostAlign), LogAlign),
+ internalKnownBits());
+ }
+ };
- /// BBOffsets - the offset of each MBB in bytes, starting from 0.
- /// The two-byte pads required for Thumb alignment are counted as part of
- /// the following block.
- std::vector<unsigned> BBOffsets;
+ std::vector<BasicBlockInfo> BBInfo;
/// WaterList - A sorted list of basic blocks where islands could be placed
/// (i.e. blocks that don't fall through to the following block, due
@@ -162,9 +254,8 @@ namespace {
/// the branch fix up pass.
bool HasFarJump;
- /// HasInlineAsm - True if the function contains inline assembly.
- bool HasInlineAsm;
-
+ MachineFunction *MF;
+ MachineConstantPool *MCP;
const ARMInstrInfo *TII;
const ARMSubtarget *STI;
ARMFunctionInfo *AFI;
@@ -182,67 +273,65 @@ namespace {
}
private:
- void DoInitialPlacement(MachineFunction &MF,
- std::vector<MachineInstr*> &CPEMIs);
+ void DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
- void JumpTableFunctionScan(MachineFunction &MF);
- void InitialFunctionScan(MachineFunction &MF,
- const std::vector<MachineInstr*> &CPEMIs);
+ unsigned getCPELogAlign(const MachineInstr *CPEMI);
+ void JumpTableFunctionScan();
+ void InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs);
MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
- void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta);
+ void AdjustBBOffsetsAfter(MachineBasicBlock *BB);
bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI);
int LookForExistingCPEntry(CPUser& U, unsigned UserOffset);
bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter);
void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset,
MachineBasicBlock *&NewMBB);
- bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex);
+ bool HandleConstantPoolUser(unsigned CPUserIndex);
void RemoveDeadCPEMI(MachineInstr *CPEMI);
bool RemoveUnusedCPEntries();
bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
MachineInstr *CPEMI, unsigned Disp, bool NegOk,
bool DoDump = false);
bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water,
- CPUser &U);
- bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
- unsigned Disp, bool NegativeOK, bool IsSoImm = false);
+ CPUser &U, unsigned &Growth);
bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
- bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br);
- bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br);
- bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br);
+ bool FixUpImmediateBr(ImmBranch &Br);
+ bool FixUpConditionalBr(ImmBranch &Br);
+ bool FixUpUnconditionalBr(ImmBranch &Br);
bool UndoLRSpillRestore();
- bool OptimizeThumb2Instructions(MachineFunction &MF);
- bool OptimizeThumb2Branches(MachineFunction &MF);
- bool ReorderThumb2JumpTables(MachineFunction &MF);
- bool OptimizeThumb2JumpTables(MachineFunction &MF);
+ bool OptimizeThumb2Instructions();
+ bool OptimizeThumb2Branches();
+ bool ReorderThumb2JumpTables();
+ bool OptimizeThumb2JumpTables();
MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB,
MachineBasicBlock *JTBB);
+ void ComputeBlockSize(MachineBasicBlock *MBB);
unsigned GetOffsetOf(MachineInstr *MI) const;
void dumpBBs();
- void verify(MachineFunction &MF);
+ void verify();
+
+ bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
+ unsigned Disp, bool NegativeOK, bool IsSoImm = false);
+ bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
+ const CPUser &U) {
+ return OffsetIsInRange(UserOffset, TrialOffset,
+ U.MaxDisp, U.NegOk, U.IsSoImm);
+ }
};
char ARMConstantIslands::ID = 0;
}
/// verify - check BBOffsets, BBSizes, alignment of islands
-void ARMConstantIslands::verify(MachineFunction &MF) {
- assert(BBOffsets.size() == BBSizes.size());
- for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i)
- assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]);
- if (!isThumb)
- return;
+void ARMConstantIslands::verify() {
#ifndef NDEBUG
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock *MBB = MBBI;
- if (!MBB->empty() &&
- MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
- unsigned MBBId = MBB->getNumber();
- assert(HasInlineAsm ||
- (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) ||
- (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0));
- }
+ unsigned Align = MBB->getAlignment();
+ unsigned MBBId = MBB->getNumber();
+ assert(BBInfo[MBBId].Offset % (1u << Align) == 0);
+ assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
}
for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
CPUser &U = CPUsers[i];
@@ -257,10 +346,16 @@ void ARMConstantIslands::verify(MachineFunction &MF) {
/// print block size and offset information - debugging
void ARMConstantIslands::dumpBBs() {
- for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) {
- DEBUG(errs() << "block " << J << " offset " << BBOffsets[J]
- << " size " << BBSizes[J] << "\n");
- }
+ DEBUG({
+ for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
+ const BasicBlockInfo &BBI = BBInfo[J];
+ dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
+ << " kb=" << unsigned(BBI.KnownBits)
+ << " ua=" << unsigned(BBI.Unalign)
+ << " pa=" << unsigned(BBI.PostAlign)
+ << format(" size=%#x\n", BBInfo[J].Size);
+ }
+ });
}
/// createARMConstantIslandPass - returns an instance of the constpool
@@ -269,34 +364,38 @@ FunctionPass *llvm::createARMConstantIslandPass() {
return new ARMConstantIslands();
}
-bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
- MachineConstantPool &MCP = *MF.getConstantPool();
+bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ MCP = mf.getConstantPool();
- TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo();
- AFI = MF.getInfo<ARMFunctionInfo>();
- STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
+ DEBUG(dbgs() << "***** ARMConstantIslands: "
+ << MCP->getConstants().size() << " CP entries, aligned to "
+ << MCP->getConstantPoolAlignment() << " bytes *****\n");
+
+ TII = (const ARMInstrInfo*)MF->getTarget().getInstrInfo();
+ AFI = MF->getInfo<ARMFunctionInfo>();
+ STI = &MF->getTarget().getSubtarget<ARMSubtarget>();
isThumb = AFI->isThumbFunction();
isThumb1 = AFI->isThumb1OnlyFunction();
isThumb2 = AFI->isThumb2Function();
HasFarJump = false;
- HasInlineAsm = false;
// Renumber all of the machine basic blocks in the function, guaranteeing that
// the numbers agree with the position of the block in the function.
- MF.RenumberBlocks();
+ MF->RenumberBlocks();
// Try to reorder and otherwise adjust the block layout to make good use
// of the TB[BH] instructions.
bool MadeChange = false;
if (isThumb2 && AdjustJumpTableBlocks) {
- JumpTableFunctionScan(MF);
- MadeChange |= ReorderThumb2JumpTables(MF);
+ JumpTableFunctionScan();
+ MadeChange |= ReorderThumb2JumpTables();
// Data is out of date, so clear it. It'll be re-computed later.
T2JumpTables.clear();
// Blocks may have shifted around. Keep the numbering up to date.
- MF.RenumberBlocks();
+ MF->RenumberBlocks();
}
// Thumb1 functions containing constant pools get 4-byte alignment.
@@ -304,16 +403,13 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// ARM and Thumb2 functions need to be 4-byte aligned.
if (!isThumb1)
- MF.EnsureAlignment(2); // 2 = log2(4)
+ MF->EnsureAlignment(2); // 2 = log2(4)
// Perform the initial placement of the constant pool entries. To start with,
// we put them all at the end of the function.
std::vector<MachineInstr*> CPEMIs;
- if (!MCP.isEmpty()) {
- DoInitialPlacement(MF, CPEMIs);
- if (isThumb1)
- MF.EnsureAlignment(2); // 2 = log2(4)
- }
+ if (!MCP->isEmpty())
+ DoInitialPlacement(CPEMIs);
/// The next UID to take is the first unused one.
AFI->initPICLabelUId(CPEMIs.size());
@@ -321,7 +417,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// Do the initial scan of the function, building up information about the
// sizes of each block, the location of all the water, and finding all of the
// constant pool users.
- InitialFunctionScan(MF, CPEMIs);
+ InitialFunctionScan(CPEMIs);
CPEMIs.clear();
DEBUG(dumpBBs());
@@ -333,9 +429,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// is no change.
unsigned NoCPIters = 0, NoBRIters = 0;
while (true) {
+ DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
bool CPChange = false;
for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
- CPChange |= HandleConstantPoolUser(MF, i);
+ CPChange |= HandleConstantPoolUser(i);
if (CPChange && ++NoCPIters > 30)
llvm_unreachable("Constant Island pass failed to converge!");
DEBUG(dumpBBs());
@@ -344,9 +441,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// appear as "new water" for the next iteration of constant pool placement.
NewWaterList.clear();
+ DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n');
bool BRChange = false;
for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
- BRChange |= FixUpImmediateBr(MF, ImmBranches[i]);
+ BRChange |= FixUpImmediateBr(ImmBranches[i]);
if (BRChange && ++NoBRIters > 30)
llvm_unreachable("Branch Fix Up pass failed to converge!");
DEBUG(dumpBBs());
@@ -358,10 +456,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// Shrink 32-bit Thumb2 branch, load, and store instructions.
if (isThumb2 && !STI->prefers32BitThumb())
- MadeChange |= OptimizeThumb2Instructions(MF);
+ MadeChange |= OptimizeThumb2Instructions();
// After a while, this might be made debug-only, but it is not expensive.
- verify(MF);
+ verify();
// If LR has been forced spilled and no far jump (i.e. BL) has been issued,
// undo the spill / restore of LR if possible.
@@ -376,10 +474,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
}
}
- DEBUG(errs() << '\n'; dumpBBs());
+ DEBUG(dbgs() << '\n'; dumpBBs());
- BBSizes.clear();
- BBOffsets.clear();
+ BBInfo.clear();
WaterList.clear();
CPUsers.clear();
CPEntries.clear();
@@ -392,37 +489,65 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
/// DoInitialPlacement - Perform the initial placement of the constant pool
/// entries. To start with, we put them all at the end of the function.
-void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
- std::vector<MachineInstr*> &CPEMIs) {
+void
+ARMConstantIslands::DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
// Create the basic block to hold the CPE's.
- MachineBasicBlock *BB = MF.CreateMachineBasicBlock();
- MF.push_back(BB);
+ MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
+ MF->push_back(BB);
+
+ // MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
+ unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
+
+ // Mark the basic block as required by the const-pool.
+ // If AlignConstantIslands isn't set, use 4-byte alignment for everything.
+ BB->setAlignment(AlignConstantIslands ? MaxAlign : 2);
+
+ // The function needs to be as aligned as the basic blocks. The linker may
+ // move functions around based on their alignment.
+ MF->EnsureAlignment(BB->getAlignment());
+
+ // Order the entries in BB by descending alignment. That ensures correct
+ // alignment of all entries as long as BB is sufficiently aligned. Keep
+ // track of the insertion point for each alignment. We are going to bucket
+ // sort the entries as they are created.
+ SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end());
// Add all of the constants from the constant pool to the end block, use an
// identity mapping of CPI's to CPE's.
- const std::vector<MachineConstantPoolEntry> &CPs =
- MF.getConstantPool()->getConstants();
+ const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
- const TargetData &TD = *MF.getTarget().getTargetData();
+ const TargetData &TD = *MF->getTarget().getTargetData();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
- // Verify that all constant pool entries are a multiple of 4 bytes. If not,
- // we would have to pad them out or something so that instructions stay
- // aligned.
- assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
+ assert(Size >= 4 && "Too small constant pool entry");
+ unsigned Align = CPs[i].getAlignment();
+ assert(isPowerOf2_32(Align) && "Invalid alignment");
+ // Verify that all constant pool entries are a multiple of their alignment.
+ // If not, we would have to pad them out so that instructions stay aligned.
+ assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!");
+
+ // Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
+ unsigned LogAlign = Log2_32(Align);
+ MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
MachineInstr *CPEMI =
- BuildMI(BB, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+ BuildMI(*BB, InsAt, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
.addImm(i).addConstantPoolIndex(i).addImm(Size);
CPEMIs.push_back(CPEMI);
+ // Ensure that future entries with higher alignment get inserted before
+ // CPEMI. This is bucket sort with iterators.
+ for (unsigned a = LogAlign + 1; a < MaxAlign; ++a)
+ if (InsPoint[a] == InsAt)
+ InsPoint[a] = CPEMI;
+
// Add a new CPEntry, but no corresponding CPUser yet.
std::vector<CPEntry> CPEs;
CPEs.push_back(CPEntry(CPEMI, i));
CPEntries.push_back(CPEs);
++NumCPEs;
- DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i
- << "\n");
+ DEBUG(dbgs() << "Moved CPI#" << i << " to end of function\n");
}
+ DEBUG(BB->dump());
}
/// BBHasFallthrough - Return true if the specified basic block can fallthrough
@@ -458,17 +583,33 @@ ARMConstantIslands::CPEntry
return NULL;
}
+/// getCPELogAlign - Returns the required alignment of the constant pool entry
+/// represented by CPEMI. Alignment is measured in log2(bytes) units.
+unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
+ assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY);
+
+ // Everything is 4-byte aligned unless AlignConstantIslands is set.
+ if (!AlignConstantIslands)
+ return 2;
+
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
+ unsigned Align = MCP->getConstants()[CPI].getAlignment();
+ assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
+ return Log2_32(Align);
+}
+
/// JumpTableFunctionScan - Do a scan of the function, building up
/// information about the sizes of each block and the locations of all
/// the jump tables.
-void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+void ARMConstantIslands::JumpTableFunctionScan() {
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock &MBB = *MBBI;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I)
- if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT)
+ if (I->isBranch() && I->getOpcode() == ARM::t2BR_JT)
T2JumpTables.push_back(I);
}
}
@@ -476,23 +617,27 @@ void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
/// InitialFunctionScan - Do the initial scan of the function, building up
/// information about the sizes of each block, the location of all the water,
/// and finding all of the constant pool users.
-void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
- const std::vector<MachineInstr*> &CPEMIs) {
- // First thing, see if the function has any inline assembly in it. If so,
- // we have to be conservative about alignment assumptions, as we don't
- // know for sure the size of any instructions in the inline assembly.
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
- MBBI != E; ++MBBI) {
- MachineBasicBlock &MBB = *MBBI;
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I)
- if (I->getOpcode() == ARM::INLINEASM)
- HasInlineAsm = true;
- }
+void ARMConstantIslands::
+InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs) {
+ BBInfo.clear();
+ BBInfo.resize(MF->getNumBlockIDs());
+
+ // First thing, compute the size of all basic blocks, and see if the function
+ // has any inline assembly in it. If so, we have to be conservative about
+ // alignment assumptions, as we don't know for sure the size of any
+ // instructions in the inline assembly.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
+ ComputeBlockSize(I);
+
+ // The known bits of the entry block offset are determined by the function
+ // alignment.
+ BBInfo.front().KnownBits = MF->getAlignment();
+
+ // Compute block offsets and known bits.
+ AdjustBBOffsetsAfter(MF->begin());
// Now go back through the instructions and build up our data structures.
- unsigned Offset = 0;
- for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock &MBB = *MBBI;
@@ -501,16 +646,13 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
if (!BBHasFallthrough(&MBB))
WaterList.push_back(&MBB);
- unsigned MBBSize = 0;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
if (I->isDebugValue())
continue;
- // Add instruction size to MBBSize.
- MBBSize += TII->GetInstSizeInBytes(I);
int Opc = I->getOpcode();
- if (I->getDesc().isBranch()) {
+ if (I->isBranch()) {
bool isCond = false;
unsigned Bits = 0;
unsigned Scale = 1;
@@ -518,18 +660,6 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
switch (Opc) {
default:
continue; // Ignore other JT branches
- case ARM::tBR_JTr:
- // A Thumb1 table jump may involve padding; for the offsets to
- // be right, functions containing these must be 4-byte aligned.
- // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
- // table entries. So this code checks whether offset of tBR_JTr + 2
- // is aligned. That is held in Offset+MBBSize, which already has
- // 2 added in for the size of the mov pc instruction.
- MF.EnsureAlignment(2U);
- if ((Offset+MBBSize)%4 != 0 || HasInlineAsm)
- // FIXME: Add a pseudo ALIGN instruction instead.
- MBBSize += 2; // padding
- continue; // Does not get an entry in ImmBranches
case ARM::t2BR_JT:
T2JumpTables.push_back(I);
continue; // Does not get an entry in ImmBranches
@@ -647,18 +777,30 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
break;
}
}
+ }
+}
- // In thumb mode, if this block is a constpool island, we may need padding
- // so it's aligned on 4 byte boundary.
- if (isThumb &&
- !MBB.empty() &&
- MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY &&
- ((Offset%4) != 0 || HasInlineAsm))
- MBBSize += 2;
-
- BBSizes.push_back(MBBSize);
- BBOffsets.push_back(Offset);
- Offset += MBBSize;
+/// ComputeBlockSize - Compute the size and some alignment information for MBB.
+/// This function updates BBInfo directly.
+void ARMConstantIslands::ComputeBlockSize(MachineBasicBlock *MBB) {
+ BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
+ BBI.Size = 0;
+ BBI.Unalign = 0;
+ BBI.PostAlign = 0;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+ BBI.Size += TII->GetInstSizeInBytes(I);
+ // For inline asm, GetInstSizeInBytes returns a conservative estimate.
+ // The actual size may be smaller, but still a multiple of the instr size.
+ if (I->isInlineAsm())
+ BBI.Unalign = isThumb ? 1 : 2;
+ }
+
+ // tBR_JTr contains a .align 2 directive.
+ if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
+ BBI.PostAlign = 2;
+ MBB->getParent()->EnsureAlignment(2);
}
}
@@ -671,14 +813,7 @@ unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
// The offset is composed of two things: the sum of the sizes of all MBB's
// before this instruction's block, and the offset from the start of the block
// it is in.
- unsigned Offset = BBOffsets[MBB->getNumber()];
-
- // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has
- // alignment padding, and compensate if so.
- if (isThumb &&
- MI->getOpcode() == ARM::CONSTPOOL_ENTRY &&
- (Offset%4 != 0 || HasInlineAsm))
- Offset += 2;
+ unsigned Offset = BBInfo[MBB->getNumber()].Offset;
// Sum instructions before MI in MBB.
for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
@@ -702,12 +837,9 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
// Renumber the MBB's to keep them consecutive.
NewBB->getParent()->RenumberBlocks(NewBB);
- // Insert a size into BBSizes to align it properly with the (newly
+ // Insert an entry into BBInfo to align it properly with the (newly
// renumbered) block numbers.
- BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
-
- // Likewise for BBOffsets.
- BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
// Next, update WaterList. Specifically, we need to add NewMBB as having
// available water after it.
@@ -723,13 +855,12 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
/// account for this change and returns the newly created block.
MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
MachineBasicBlock *OrigBB = MI->getParent();
- MachineFunction &MF = *OrigBB->getParent();
// Create a new MBB for the code after the OrigBB.
MachineBasicBlock *NewBB =
- MF.CreateMachineBasicBlock(OrigBB->getBasicBlock());
+ MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
MachineFunction::iterator MBBI = OrigBB; ++MBBI;
- MF.insert(MBBI, NewBB);
+ MF->insert(MBBI, NewBB);
// Splice the instructions starting with MI over to NewBB.
NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
@@ -747,16 +878,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
++NumSplit;
// Update the CFG. All succs of OrigBB are now succs of NewBB.
- while (!OrigBB->succ_empty()) {
- MachineBasicBlock *Succ = *OrigBB->succ_begin();
- OrigBB->removeSuccessor(Succ);
- NewBB->addSuccessor(Succ);
-
- // This pass should be run after register allocation, so there should be no
- // PHI nodes to update.
- assert((Succ->empty() || !Succ->begin()->isPHI())
- && "PHI nodes should be eliminated by now!");
- }
+ NewBB->transferSuccessors(OrigBB);
// OrigBB branches to NewBB.
OrigBB->addSuccessor(NewBB);
@@ -764,14 +886,11 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
// Update internal data structures to account for the newly inserted MBB.
// This is almost the same as UpdateForInsertedWaterBlock, except that
// the Water goes after OrigBB, not NewBB.
- MF.RenumberBlocks(NewBB);
+ MF->RenumberBlocks(NewBB);
- // Insert a size into BBSizes to align it properly with the (newly
+ // Insert an entry into BBInfo to align it properly with the (newly
// renumbered) block numbers.
- BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
-
- // Likewise for BBOffsets.
- BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
// Next, update WaterList. Specifically, we need to add OrigMBB as having
// available water after it (but not if it's already there, which happens
@@ -787,54 +906,19 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
WaterList.insert(IP, OrigBB);
NewWaterList.insert(OrigBB);
- unsigned OrigBBI = OrigBB->getNumber();
- unsigned NewBBI = NewBB->getNumber();
-
- int delta = isThumb1 ? 2 : 4;
-
// Figure out how large the OrigBB is. As the first half of the original
// block, it cannot contain a tablejump. The size includes
// the new jump we added. (It should be possible to do this without
// recounting everything, but it's very confusing, and this is rarely
// executed.)
- unsigned OrigBBSize = 0;
- for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end();
- I != E; ++I)
- OrigBBSize += TII->GetInstSizeInBytes(I);
- BBSizes[OrigBBI] = OrigBBSize;
-
- // ...and adjust BBOffsets for NewBB accordingly.
- BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+ ComputeBlockSize(OrigBB);
// Figure out how large the NewMBB is. As the second half of the original
// block, it may contain a tablejump.
- unsigned NewBBSize = 0;
- for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
- I != E; ++I)
- NewBBSize += TII->GetInstSizeInBytes(I);
- // Set the size of NewBB in BBSizes. It does not include any padding now.
- BBSizes[NewBBI] = NewBBSize;
-
- MachineInstr* ThumbJTMI = prior(NewBB->end());
- if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
- // We've added another 2-byte instruction before this tablejump, which
- // means we will always need padding if we didn't before, and vice versa.
-
- // The original offset of the jump instruction was:
- unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta;
- if (OrigOffset%4 == 0) {
- // We had padding before and now we don't. No net change in code size.
- delta = 0;
- } else {
- // We didn't have padding before and now we do.
- BBSizes[NewBBI] += 2;
- delta = 4;
- }
- }
+ ComputeBlockSize(NewBB);
// All BBOffsets following these blocks must be modified.
- if (delta)
- AdjustBBOffsetsAfter(NewBB, delta);
+ AdjustBBOffsetsAfter(OrigBB);
return NewBB;
}
@@ -882,19 +966,44 @@ bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
/// WaterIsInRange - Returns true if a CPE placed after the specified
/// Water (a basic block) will be in range for the specific MI.
-
+///
+/// Compute how much the function will grow by inserting a CPE after Water.
bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset,
- MachineBasicBlock* Water, CPUser &U) {
- unsigned MaxDisp = U.MaxDisp;
- unsigned CPEOffset = BBOffsets[Water->getNumber()] +
- BBSizes[Water->getNumber()];
-
- // If the CPE is to be inserted before the instruction, that will raise
- // the offset of the instruction.
- if (CPEOffset < UserOffset)
- UserOffset += U.CPEMI->getOperand(2).getImm();
-
- return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, U.NegOk, U.IsSoImm);
+ MachineBasicBlock* Water, CPUser &U,
+ unsigned &Growth) {
+ unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
+ unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
+ unsigned NextBlockOffset, NextBlockAlignment;
+ MachineFunction::const_iterator NextBlock = Water;
+ if (++NextBlock == MF->end()) {
+ NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
+ NextBlockAlignment = 0;
+ } else {
+ NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
+ NextBlockAlignment = NextBlock->getAlignment();
+ }
+ unsigned Size = U.CPEMI->getOperand(2).getImm();
+ unsigned CPEEnd = CPEOffset + Size;
+
+ // The CPE may be able to hide in the alignment padding before the next
+ // block. It may also cause more padding to be required if it is more aligned
+ // that the next block.
+ if (CPEEnd > NextBlockOffset) {
+ Growth = CPEEnd - NextBlockOffset;
+ // Compute the padding that would go at the end of the CPE to align the next
+ // block.
+ Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment);
+
+ // If the CPE is to be inserted before the instruction, that will raise
+ // the offset of the instruction. Also account for unknown alignment padding
+ // in blocks between CPE and the user.
+ if (CPEOffset < UserOffset)
+ UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign);
+ } else
+ // CPE fits in existing padding.
+ Growth = 0;
+
+ return OffsetIsInRange(UserOffset, CPEOffset, U);
}
/// CPEIsInRange - Returns true if the distance between specific MI and
@@ -903,14 +1012,20 @@ bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
MachineInstr *CPEMI, unsigned MaxDisp,
bool NegOk, bool DoDump) {
unsigned CPEOffset = GetOffsetOf(CPEMI);
- assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE");
+ assert(CPEOffset % 4 == 0 && "Misaligned CPE");
if (DoDump) {
- DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
- << " max delta=" << MaxDisp
- << " insn address=" << UserOffset
- << " CPE address=" << CPEOffset
- << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI);
+ DEBUG({
+ unsigned Block = MI->getParent()->getNumber();
+ const BasicBlockInfo &BBI = BBInfo[Block];
+ dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
+ << " max delta=" << MaxDisp
+ << format(" insn address=%#x", UserOffset)
+ << " in BB#" << Block << ": "
+ << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI
+ << format("CPE address=%#x offset=%+d: ", CPEOffset,
+ int(CPEOffset-UserOffset));
+ });
}
return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
@@ -933,55 +1048,17 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
}
#endif // NDEBUG
-void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
- int delta) {
- MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI);
- for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs();
- i < e; ++i) {
- BBOffsets[i] += delta;
- // If some existing blocks have padding, adjust the padding as needed, a
- // bit tricky. delta can be negative so don't use % on that.
- if (!isThumb)
- continue;
- MachineBasicBlock *MBB = MBBI;
- if (!MBB->empty() && !HasInlineAsm) {
- // Constant pool entries require padding.
- if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
- unsigned OldOffset = BBOffsets[i] - delta;
- if ((OldOffset%4) == 0 && (BBOffsets[i]%4) != 0) {
- // add new padding
- BBSizes[i] += 2;
- delta += 2;
- } else if ((OldOffset%4) != 0 && (BBOffsets[i]%4) == 0) {
- // remove existing padding
- BBSizes[i] -= 2;
- delta -= 2;
- }
- }
- // Thumb1 jump tables require padding. They should be at the end;
- // following unconditional branches are removed by AnalyzeBranch.
- // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
- // table entries. So this code checks whether offset of tBR_JTr
- // is aligned; if it is, the offset of the jump table following the
- // instruction will not be aligned, and we need padding.
- MachineInstr *ThumbJTMI = prior(MBB->end());
- if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
- unsigned NewMIOffset = GetOffsetOf(ThumbJTMI);
- unsigned OldMIOffset = NewMIOffset - delta;
- if ((OldMIOffset%4) == 0 && (NewMIOffset%4) != 0) {
- // remove existing padding
- BBSizes[i] -= 2;
- delta -= 2;
- } else if ((OldMIOffset%4) != 0 && (NewMIOffset%4) == 0) {
- // add new padding
- BBSizes[i] += 2;
- delta += 2;
- }
- }
- if (delta==0)
- return;
- }
- MBBI = llvm::next(MBBI);
+void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB) {
+ for(unsigned i = BB->getNumber() + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
+ // Get the offset and known bits at the end of the layout predecessor.
+ // Include the alignment of the current block.
+ unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
+ unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
+ unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
+
+ // This is where block i begins.
+ BBInfo[i].Offset = Offset;
+ BBInfo[i].KnownBits = KnownBits;
}
}
@@ -1016,7 +1093,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
// Check to see if the CPE is already in-range.
if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) {
- DEBUG(errs() << "In range\n");
+ DEBUG(dbgs() << "In range\n");
return 1;
}
@@ -1031,7 +1108,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
if (CPEs[i].CPEMI == NULL)
continue;
if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) {
- DEBUG(errs() << "Replacing CPE#" << CPI << " with CPE#"
+ DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
<< CPEs[i].CPI << "\n");
// Point the CPUser node to the replacement
U.CPEMI = CPEs[i].CPEMI;
@@ -1079,10 +1156,9 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
if (WaterList.empty())
return false;
- bool FoundWaterThatWouldPad = false;
- water_iterator IPThatWouldPad;
- for (water_iterator IP = prior(WaterList.end()),
- B = WaterList.begin();; --IP) {
+ unsigned BestGrowth = ~0u;
+ for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();;
+ --IP) {
MachineBasicBlock* WaterBB = *IP;
// Check if water is in range and is either at a lower address than the
// current "high water mark" or a new water block that was created since
@@ -1092,31 +1168,24 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
// should be relatively uncommon and when it does happen, we want to be
// sure to take advantage of it for all the CPEs near that block, so that
// we don't insert more branches than necessary.
- if (WaterIsInRange(UserOffset, WaterBB, U) &&
+ unsigned Growth;
+ if (WaterIsInRange(UserOffset, WaterBB, U, Growth) &&
(WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
- NewWaterList.count(WaterBB))) {
- unsigned WBBId = WaterBB->getNumber();
- if (isThumb &&
- (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) {
- // This is valid Water, but would introduce padding. Remember
- // it in case we don't find any Water that doesn't do this.
- if (!FoundWaterThatWouldPad) {
- FoundWaterThatWouldPad = true;
- IPThatWouldPad = IP;
- }
- } else {
- WaterIter = IP;
+ NewWaterList.count(WaterBB)) && Growth < BestGrowth) {
+ // This is the least amount of required padding seen so far.
+ BestGrowth = Growth;
+ WaterIter = IP;
+ DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber()
+ << " Growth=" << Growth << '\n');
+
+ // Keep looking unless it is perfect.
+ if (BestGrowth == 0)
return true;
- }
}
if (IP == B)
break;
}
- if (FoundWaterThatWouldPad) {
- WaterIter = IPThatWouldPad;
- return true;
- }
- return false;
+ return BestGrowth != ~0u;
}
/// CreateNewWater - No existing WaterList entry will work for
@@ -1132,114 +1201,143 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
CPUser &U = CPUsers[CPUserIndex];
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
+ unsigned CPELogAlign = getCPELogAlign(CPEMI);
MachineBasicBlock *UserMBB = UserMI->getParent();
- unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] +
- BBSizes[UserMBB->getNumber()];
- assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]);
+ const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
// If the block does not end in an unconditional branch already, and if the
// end of the block is within range, make new water there. (The addition
// below is for the unconditional branch we will be adding: 4 bytes on ARM +
// Thumb2, 2 on Thumb1. Possible Thumb1 alignment padding is allowed for
// inside OffsetIsInRange.
- if (BBHasFallthrough(UserMBB) &&
- OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4),
- U.MaxDisp, U.NegOk, U.IsSoImm)) {
- DEBUG(errs() << "Split at end of block\n");
- if (&UserMBB->back() == UserMI)
- assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
- NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
- // Add an unconditional branch from UserMBB to fallthrough block.
- // Record it for branch lengthening; this new branch will not get out of
- // range, but if the preceding conditional branch is out of range, the
- // targets will be exchanged, and the altered branch may be out of
- // range, so the machinery has to know about it.
- int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
- if (!isThumb)
- BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
- else
- BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
- .addImm(ARMCC::AL).addReg(0);
- unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
- ImmBranches.push_back(ImmBranch(&UserMBB->back(),
- MaxDisp, false, UncondBr));
- int delta = isThumb1 ? 2 : 4;
- BBSizes[UserMBB->getNumber()] += delta;
- AdjustBBOffsetsAfter(UserMBB, delta);
- } else {
- // What a big block. Find a place within the block to split it.
- // This is a little tricky on Thumb1 since instructions are 2 bytes
- // and constant pool entries are 4 bytes: if instruction I references
- // island CPE, and instruction I+1 references CPE', it will
- // not work well to put CPE as far forward as possible, since then
- // CPE' cannot immediately follow it (that location is 2 bytes
- // farther away from I+1 than CPE was from I) and we'd need to create
- // a new island. So, we make a first guess, then walk through the
- // instructions between the one currently being looked at and the
- // possible insertion point, and make sure any other instructions
- // that reference CPEs will be able to use the same island area;
- // if not, we back up the insertion point.
-
- // The 4 in the following is for the unconditional branch we'll be
- // inserting (allows for long branch on Thumb1). Alignment of the
- // island is handled inside OffsetIsInRange.
- unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4;
- // This could point off the end of the block if we've already got
- // constant pool entries following this block; only the last one is
- // in the water list. Back past any possible branches (allow for a
- // conditional and a maximally long unconditional).
- if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1])
- BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] -
- (isThumb1 ? 6 : 8);
- unsigned EndInsertOffset = BaseInsertOffset +
- CPEMI->getOperand(2).getImm();
- MachineBasicBlock::iterator MI = UserMI;
- ++MI;
- unsigned CPUIndex = CPUserIndex+1;
- unsigned NumCPUsers = CPUsers.size();
- MachineInstr *LastIT = 0;
- for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
- Offset < BaseInsertOffset;
- Offset += TII->GetInstSizeInBytes(MI),
- MI = llvm::next(MI)) {
- if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
- CPUser &U = CPUsers[CPUIndex];
- if (!OffsetIsInRange(Offset, EndInsertOffset,
- U.MaxDisp, U.NegOk, U.IsSoImm)) {
- BaseInsertOffset -= (isThumb1 ? 2 : 4);
- EndInsertOffset -= (isThumb1 ? 2 : 4);
- }
- // This is overly conservative, as we don't account for CPEMIs
- // being reused within the block, but it doesn't matter much.
- EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
- CPUIndex++;
- }
+ if (BBHasFallthrough(UserMBB)) {
+ // Size of branch to insert.
+ unsigned Delta = isThumb1 ? 2 : 4;
+ // End of UserBlock after adding a branch.
+ unsigned UserBlockEnd = UserBBI.postOffset() + Delta;
+ // Compute the offset where the CPE will begin.
+ unsigned CPEOffset = WorstCaseAlign(UserBlockEnd, CPELogAlign,
+ UserBBI.postKnownBits());
+
+ if (OffsetIsInRange(UserOffset, CPEOffset, U)) {
+ DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
+ << format(", expected CPE offset %#x\n", CPEOffset));
+ NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
+ // Add an unconditional branch from UserMBB to fallthrough block. Record
+ // it for branch lengthening; this new branch will not get out of range,
+ // but if the preceding conditional branch is out of range, the targets
+ // will be exchanged, and the altered branch may be out of range, so the
+ // machinery has to know about it.
+ int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
+ if (!isThumb)
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
+ else
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
+ .addImm(ARMCC::AL).addReg(0);
+ unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+ ImmBranches.push_back(ImmBranch(&UserMBB->back(),
+ MaxDisp, false, UncondBr));
+ BBInfo[UserMBB->getNumber()].Size += Delta;
+ AdjustBBOffsetsAfter(UserMBB);
+ return;
+ }
+ }
- // Remember the last IT instruction.
- if (MI->getOpcode() == ARM::t2IT)
- LastIT = MI;
+ // What a big block. Find a place within the block to split it. This is a
+ // little tricky on Thumb1 since instructions are 2 bytes and constant pool
+ // entries are 4 bytes: if instruction I references island CPE, and
+ // instruction I+1 references CPE', it will not work well to put CPE as far
+ // forward as possible, since then CPE' cannot immediately follow it (that
+ // location is 2 bytes farther away from I+1 than CPE was from I) and we'd
+ // need to create a new island. So, we make a first guess, then walk through
+ // the instructions between the one currently being looked at and the
+ // possible insertion point, and make sure any other instructions that
+ // reference CPEs will be able to use the same island area; if not, we back
+ // up the insertion point.
+
+ // Try to split the block so it's fully aligned. Compute the latest split
+ // point where we can add a 4-byte branch instruction, and then
+ // WorstCaseAlign to LogAlign.
+ unsigned LogAlign = MF->getAlignment();
+ assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
+ unsigned KnownBits = UserBBI.internalKnownBits();
+ unsigned UPad = UnknownPadding(LogAlign, KnownBits);
+ unsigned BaseInsertOffset = UserOffset + U.MaxDisp;
+ DEBUG(dbgs() << format("Split in middle of big block before %#x",
+ BaseInsertOffset));
+
+ // Account for alignment and unknown padding.
+ BaseInsertOffset &= ~((1u << LogAlign) - 1);
+ BaseInsertOffset -= UPad;
+
+ // The 4 in the following is for the unconditional branch we'll be inserting
+ // (allows for long branch on Thumb1). Alignment of the island is handled
+ // inside OffsetIsInRange.
+ BaseInsertOffset -= 4;
+
+ DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
+ << " la=" << LogAlign
+ << " kb=" << KnownBits
+ << " up=" << UPad << '\n');
+
+ // This could point off the end of the block if we've already got constant
+ // pool entries following this block; only the last one is in the water list.
+ // Back past any possible branches (allow for a conditional and a maximally
+ // long unconditional).
+ if (BaseInsertOffset >= BBInfo[UserMBB->getNumber()+1].Offset)
+ BaseInsertOffset = BBInfo[UserMBB->getNumber()+1].Offset -
+ (isThumb1 ? 6 : 8);
+ unsigned EndInsertOffset =
+ WorstCaseAlign(BaseInsertOffset + 4, LogAlign, KnownBits) +
+ CPEMI->getOperand(2).getImm();
+ MachineBasicBlock::iterator MI = UserMI;
+ ++MI;
+ unsigned CPUIndex = CPUserIndex+1;
+ unsigned NumCPUsers = CPUsers.size();
+ MachineInstr *LastIT = 0;
+ for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+ Offset < BaseInsertOffset;
+ Offset += TII->GetInstSizeInBytes(MI),
+ MI = llvm::next(MI)) {
+ if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
+ CPUser &U = CPUsers[CPUIndex];
+ if (!OffsetIsInRange(Offset, EndInsertOffset, U)) {
+ // Shift intertion point by one unit of alignment so it is within reach.
+ BaseInsertOffset -= 1u << LogAlign;
+ EndInsertOffset -= 1u << LogAlign;
+ }
+ // This is overly conservative, as we don't account for CPEMIs being
+ // reused within the block, but it doesn't matter much. Also assume CPEs
+ // are added in order with alignment padding. We may eventually be able
+ // to pack the aligned CPEs better.
+ EndInsertOffset = RoundUpToAlignment(EndInsertOffset,
+ 1u << getCPELogAlign(U.CPEMI)) +
+ U.CPEMI->getOperand(2).getImm();
+ CPUIndex++;
}
- DEBUG(errs() << "Split in middle of big block\n");
- --MI;
+ // Remember the last IT instruction.
+ if (MI->getOpcode() == ARM::t2IT)
+ LastIT = MI;
+ }
- // Avoid splitting an IT block.
- if (LastIT) {
- unsigned PredReg = 0;
- ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
- if (CC != ARMCC::AL)
- MI = LastIT;
- }
- NewMBB = SplitBlockBeforeInstr(MI);
+ --MI;
+
+ // Avoid splitting an IT block.
+ if (LastIT) {
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+ if (CC != ARMCC::AL)
+ MI = LastIT;
}
+ NewMBB = SplitBlockBeforeInstr(MI);
}
/// HandleConstantPoolUser - Analyze the specified user, checking to see if it
/// is out-of-range. If so, pick up the constant pool value and move it some
/// place in-range. Return true if we changed any addresses (thus must run
/// another pass of branch lengthening), false otherwise.
-bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
- unsigned CPUserIndex) {
+bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) {
CPUser &U = CPUsers[CPUserIndex];
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
@@ -1260,11 +1358,11 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
unsigned ID = AFI->createPICLabelUId();
// Look for water where we can place this CPE.
- MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
MachineBasicBlock *NewMBB;
water_iterator IP;
if (LookForWater(U, UserOffset, IP)) {
- DEBUG(errs() << "found water in range\n");
+ DEBUG(dbgs() << "Found water in range\n");
MachineBasicBlock *WaterBB = *IP;
// If the original WaterList entry was "new water" on this iteration,
@@ -1279,7 +1377,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
} else {
// No water found.
- DEBUG(errs() << "No water found\n");
+ DEBUG(dbgs() << "No water found\n");
CreateNewWater(CPUserIndex, UserOffset, NewMBB);
// SplitBlockBeforeInstr adds to WaterList, which is important when it is
@@ -1304,7 +1402,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
WaterList.erase(IP);
// Okay, we know we can put an island before NewMBB now, do it!
- MF.insert(NewMBB, NewIsland);
+ MF->insert(NewMBB, NewIsland);
// Update internal data structures to account for the newly inserted MBB.
UpdateForInsertedWaterBlock(NewIsland);
@@ -1320,13 +1418,12 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
++NumCPEs;
- BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
- // Compensate for .align 2 in thumb mode.
- if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm))
- Size += 2;
+ // Mark the basic block as aligned as required by the const-pool entry.
+ NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
+
// Increase the size of the island block to account for the new entry.
- BBSizes[NewIsland->getNumber()] += Size;
- AdjustBBOffsetsAfter(NewIsland, Size);
+ BBInfo[NewIsland->getNumber()].Size += Size;
+ AdjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland)));
// Finally, change the CPI in the instruction operand to be ID.
for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
@@ -1335,8 +1432,8 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
break;
}
- DEBUG(errs() << " Moved CPE to #" << ID << " CPI=" << CPI
- << '\t' << *UserMI);
+ DEBUG(dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI
+ << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset));
return true;
}
@@ -1347,19 +1444,18 @@ void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
MachineBasicBlock *CPEBB = CPEMI->getParent();
unsigned Size = CPEMI->getOperand(2).getImm();
CPEMI->eraseFromParent();
- BBSizes[CPEBB->getNumber()] -= Size;
+ BBInfo[CPEBB->getNumber()].Size -= Size;
// All succeeding offsets have the current size value added in, fix this.
if (CPEBB->empty()) {
- // In thumb1 mode, the size of island may be padded by two to compensate for
- // the alignment requirement. Then it will now be 2 when the block is
- // empty, so fix this.
- // All succeeding offsets have the current size value added in, fix this.
- if (BBSizes[CPEBB->getNumber()] != 0) {
- Size += BBSizes[CPEBB->getNumber()];
- BBSizes[CPEBB->getNumber()] = 0;
- }
- }
- AdjustBBOffsetsAfter(CPEBB, -Size);
+ BBInfo[CPEBB->getNumber()].Size = 0;
+
+ // This block no longer needs to be aligned. <rdar://problem/10534709>.
+ CPEBB->setAlignment(0);
+ } else
+ // Entries are sorted by descending alignment, so realign from the front.
+ CPEBB->setAlignment(getCPELogAlign(CPEBB->begin()));
+
+ AdjustBBOffsetsAfter(CPEBB);
// An island has only one predecessor BB and one successor BB. Check if
// this BB's predecessor jumps directly to this BB's successor. This
// shouldn't happen currently.
@@ -1390,9 +1486,9 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
unsigned MaxDisp) {
unsigned PCAdj = isThumb ? 4 : 8;
unsigned BrOffset = GetOffsetOf(MI) + PCAdj;
- unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+ unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
- DEBUG(errs() << "Branch of destination BB#" << DestBB->getNumber()
+ DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
<< " from BB#" << MI->getParent()->getNumber()
<< " max delta=" << MaxDisp
<< " from " << GetOffsetOf(MI) << " to " << DestOffset
@@ -1411,7 +1507,7 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far
/// away to fit in its displacement field.
-bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
+bool ARMConstantIslands::FixUpImmediateBr(ImmBranch &Br) {
MachineInstr *MI = Br.MI;
MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
@@ -1420,8 +1516,8 @@ bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
return false;
if (!Br.isCond)
- return FixUpUnconditionalBr(MF, Br);
- return FixUpConditionalBr(MF, Br);
+ return FixUpUnconditionalBr(Br);
+ return FixUpConditionalBr(Br);
}
/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is
@@ -1429,7 +1525,7 @@ bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
/// spilled in the epilogue, then we can use BL to implement a far jump.
/// Otherwise, add an intermediate branch instruction to a branch.
bool
-ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
+ARMConstantIslands::FixUpUnconditionalBr(ImmBranch &Br) {
MachineInstr *MI = Br.MI;
MachineBasicBlock *MBB = MI->getParent();
if (!isThumb1)
@@ -1438,12 +1534,12 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
// Use BL to implement far jump.
Br.MaxDisp = (1 << 21) * 2;
MI->setDesc(TII->get(ARM::tBfar));
- BBSizes[MBB->getNumber()] += 2;
- AdjustBBOffsetsAfter(MBB, 2);
+ BBInfo[MBB->getNumber()].Size += 2;
+ AdjustBBOffsetsAfter(MBB);
HasFarJump = true;
++NumUBrFixed;
- DEBUG(errs() << " Changed B to long jump " << *MI);
+ DEBUG(dbgs() << " Changed B to long jump " << *MI);
return true;
}
@@ -1452,7 +1548,7 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
/// far away to fit in its displacement field. It is converted to an inverse
/// conditional branch + an unconditional branch to the destination.
bool
-ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
+ARMConstantIslands::FixUpConditionalBr(ImmBranch &Br) {
MachineInstr *MI = Br.MI;
MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
@@ -1487,7 +1583,7 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
// b L1
MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
if (BBIsInRange(MI, NewDest, Br.MaxDisp)) {
- DEBUG(errs() << " Invert Bcc condition and swap its destination with "
+ DEBUG(dbgs() << " Invert Bcc condition and swap its destination with "
<< *BMI);
BMI->getOperand(0).setMBB(DestBB);
MI->getOperand(0).setMBB(NewDest);
@@ -1502,15 +1598,13 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
// No need for the branch to the next block. We're adding an unconditional
// branch to the destination.
int delta = TII->GetInstSizeInBytes(&MBB->back());
- BBSizes[MBB->getNumber()] -= delta;
- MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB));
- AdjustBBOffsetsAfter(SplitBB, -delta);
+ BBInfo[MBB->getNumber()].Size -= delta;
MBB->back().eraseFromParent();
- // BBOffsets[SplitBB] is wrong temporarily, fixed below
+ // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
}
MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
- DEBUG(errs() << " Insert B to BB#" << DestBB->getNumber()
+ DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber()
<< " also invert condition and change dest. to BB#"
<< NextBB->getNumber() << "\n");
@@ -1519,23 +1613,20 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
.addMBB(NextBB).addImm(CC).addReg(CCReg);
Br.MI = &MBB->back();
- BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
if (isThumb)
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
.addImm(ARMCC::AL).addReg(0);
else
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
- BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
// Remove the old conditional branch. It may or may not still be in MBB.
- BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI);
+ BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI);
MI->eraseFromParent();
-
- // The net size change is an addition of one unconditional branch.
- int delta = TII->GetInstSizeInBytes(&MBB->back());
- AdjustBBOffsetsAfter(MBB, delta);
+ AdjustBBOffsetsAfter(MBB);
return true;
}
@@ -1561,7 +1652,7 @@ bool ARMConstantIslands::UndoLRSpillRestore() {
return MadeChange;
}
-bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
+bool ARMConstantIslands::OptimizeThumb2Instructions() {
bool MadeChange = false;
// Shrink ADR and LDR from constantpool.
@@ -1598,19 +1689,19 @@ bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
U.MI->setDesc(TII->get(NewOpc));
MachineBasicBlock *MBB = U.MI->getParent();
- BBSizes[MBB->getNumber()] -= 2;
- AdjustBBOffsetsAfter(MBB, -2);
+ BBInfo[MBB->getNumber()].Size -= 2;
+ AdjustBBOffsetsAfter(MBB);
++NumT2CPShrunk;
MadeChange = true;
}
}
- MadeChange |= OptimizeThumb2Branches(MF);
- MadeChange |= OptimizeThumb2JumpTables(MF);
+ MadeChange |= OptimizeThumb2Branches();
+ MadeChange |= OptimizeThumb2JumpTables();
return MadeChange;
}
-bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
+bool ARMConstantIslands::OptimizeThumb2Branches() {
bool MadeChange = false;
for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) {
@@ -1639,8 +1730,8 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
if (BBIsInRange(Br.MI, DestBB, MaxOffs)) {
Br.MI->setDesc(TII->get(NewOpc));
MachineBasicBlock *MBB = Br.MI->getParent();
- BBSizes[MBB->getNumber()] -= 2;
- AdjustBBOffsetsAfter(MBB, -2);
+ BBInfo[MBB->getNumber()].Size -= 2;
+ AdjustBBOffsetsAfter(MBB);
++NumT2BrShrunk;
MadeChange = true;
}
@@ -1663,7 +1754,7 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
// Check if the distance is within 126. Subtract starting offset by 2
// because the cmp will be eliminated.
unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2;
- unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+ unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
MachineBasicBlock::iterator CmpMI = Br.MI;
if (CmpMI != Br.MI->getParent()->begin()) {
@@ -1681,8 +1772,8 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
CmpMI->eraseFromParent();
Br.MI->eraseFromParent();
Br.MI = NewBR;
- BBSizes[MBB->getNumber()] -= 2;
- AdjustBBOffsetsAfter(MBB, -2);
+ BBInfo[MBB->getNumber()].Size -= 2;
+ AdjustBBOffsetsAfter(MBB);
++NumCBZ;
MadeChange = true;
}
@@ -1696,12 +1787,12 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
/// jumptables when it's possible.
-bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
+bool ARMConstantIslands::OptimizeThumb2JumpTables() {
bool MadeChange = false;
// FIXME: After the tables are shrunk, can we get rid some of the
// constantpool tables?
- MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
if (MJTI == 0) return false;
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
@@ -1709,7 +1800,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
MachineInstr *MI = T2JumpTables[i];
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MCID.getNumOperands();
- unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
+ unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
MachineOperand JTOP = MI->getOperand(JTOpIdx);
unsigned JTI = JTOP.getIndex();
assert(JTI < JT.size());
@@ -1720,7 +1811,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
MachineBasicBlock *MBB = JTBBs[j];
- unsigned DstOffset = BBOffsets[MBB->getNumber()];
+ unsigned DstOffset = BBInfo[MBB->getNumber()].Offset;
// Negative offset is not ok. FIXME: We should change BB layout to make
// sure all the branches are forward.
if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
@@ -1808,8 +1899,8 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
MI->eraseFromParent();
int delta = OrigSize - NewSize;
- BBSizes[MBB->getNumber()] -= delta;
- AdjustBBOffsetsAfter(MBB, -delta);
+ BBInfo[MBB->getNumber()].Size -= delta;
+ AdjustBBOffsetsAfter(MBB);
++NumTBs;
MadeChange = true;
@@ -1821,10 +1912,10 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that
/// jump tables always branch forwards, since that's what tbb and tbh need.
-bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
+bool ARMConstantIslands::ReorderThumb2JumpTables() {
bool MadeChange = false;
- MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
if (MJTI == 0) return false;
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
@@ -1832,7 +1923,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
MachineInstr *MI = T2JumpTables[i];
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MCID.getNumOperands();
- unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
+ unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
MachineOperand JTOP = MI->getOperand(JTOpIdx);
unsigned JTI = JTOP.getIndex();
assert(JTI < JT.size());
@@ -1864,8 +1955,6 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
MachineBasicBlock *ARMConstantIslands::
AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
{
- MachineFunction &MF = *BB->getParent();
-
// If the destination block is terminated by an unconditional branch,
// try to move it; otherwise, create a new block following the jump
// table that branches back to the actual target. This is a very simple
@@ -1882,22 +1971,22 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
// If the block ends in an unconditional branch, move it. The prior block
// has to have an analyzable terminator for us to move this one. Be paranoid
// and make sure we're not trying to move the entry block of the function.
- if (!B && Cond.empty() && BB != MF.begin() &&
+ if (!B && Cond.empty() && BB != MF->begin() &&
!TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
BB->moveAfter(JTBB);
OldPrior->updateTerminator();
BB->updateTerminator();
// Update numbering to account for the block being moved.
- MF.RenumberBlocks();
+ MF->RenumberBlocks();
++NumJTMoved;
return NULL;
}
// Create a new MBB for the code after the jump BB.
MachineBasicBlock *NewBB =
- MF.CreateMachineBasicBlock(JTBB->getBasicBlock());
+ MF->CreateMachineBasicBlock(JTBB->getBasicBlock());
MachineFunction::iterator MBBI = JTBB; ++MBBI;
- MF.insert(MBBI, NewBB);
+ MF->insert(MBBI, NewBB);
// Add an unconditional branch from NewBB to BB.
// There doesn't seem to be meaningful DebugInfo available; this doesn't
@@ -1907,7 +1996,7 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
.addImm(ARMCC::AL).addReg(0);
// Update internal data structures to account for the newly inserted MBB.
- MF.RenumberBlocks(NewBB);
+ MF->RenumberBlocks(NewBB);
// Update the CFG.
NewBB->addSuccessor(BB);
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index fc464ea..01d772d 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -61,7 +61,7 @@ namespace {
void ExpandVST(MachineBasicBlock::iterator &MBBI);
void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
- unsigned Opc, bool IsExt, unsigned NumRegs);
+ unsigned Opc, bool IsExt);
void ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
};
@@ -129,12 +129,15 @@ namespace {
}
static const NEONLdStTableEntry NEONLdStTable[] = {
-{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, false, SingleSpc, 2, 4,true},
-{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true, true, SingleSpc, 2, 4,true},
-{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, false, SingleSpc, 2, 2,true},
-{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true, true, SingleSpc, 2, 2,true},
-{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, false, SingleSpc, 2, 8,true},
-{ ARM::VLD1DUPq8Pseudo_UPD, ARM::VLD1DUPq8_UPD, true, true, true, SingleSpc, 2, 8,true},
+{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, false, SingleSpc, 2, 4,false},
+{ ARM::VLD1DUPq16PseudoWB_fixed, ARM::VLD1DUPq16wb_fixed, true, true, true, SingleSpc, 2, 4,false},
+{ ARM::VLD1DUPq16PseudoWB_register, ARM::VLD1DUPq16wb_register, true, true, true, SingleSpc, 2, 4,false},
+{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, false, SingleSpc, 2, 2,false},
+{ ARM::VLD1DUPq32PseudoWB_fixed, ARM::VLD1DUPq32wb_fixed, true, true, false, SingleSpc, 2, 2,false},
+{ ARM::VLD1DUPq32PseudoWB_register, ARM::VLD1DUPq32wb_register, true, true, true, SingleSpc, 2, 2,false},
+{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, false, SingleSpc, 2, 8,false},
+{ ARM::VLD1DUPq8PseudoWB_fixed, ARM::VLD1DUPq8wb_fixed, true, true, false, SingleSpc, 2, 8,false},
+{ ARM::VLD1DUPq8PseudoWB_register, ARM::VLD1DUPq8wb_register, true, true, true, SingleSpc, 2, 8,false},
{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true},
{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true},
@@ -177,18 +180,24 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true},
{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, false, SingleSpc, 2, 4 ,false},
-{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, true, SingleSpc, 2, 4 ,false},
+{ ARM::VLD2d16PseudoWB_fixed, ARM::VLD2d16wb_fixed, true, true, false, SingleSpc, 2, 4 ,false},
+{ ARM::VLD2d16PseudoWB_register, ARM::VLD2d16wb_register, true, true, true, SingleSpc, 2, 4 ,false},
{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, false, SingleSpc, 2, 2 ,false},
-{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, true, SingleSpc, 2, 2 ,false},
+{ ARM::VLD2d32PseudoWB_fixed, ARM::VLD2d32wb_fixed, true, true, false, SingleSpc, 2, 2 ,false},
+{ ARM::VLD2d32PseudoWB_register, ARM::VLD2d32wb_register, true, true, true, SingleSpc, 2, 2 ,false},
{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, false, SingleSpc, 2, 8 ,false},
-{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, true, SingleSpc, 2, 8 ,false},
+{ ARM::VLD2d8PseudoWB_fixed, ARM::VLD2d8wb_fixed, true, true, false, SingleSpc, 2, 8 ,false},
+{ ARM::VLD2d8PseudoWB_register, ARM::VLD2d8wb_register, true, true, true, SingleSpc, 2, 8 ,false},
{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false},
-{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, true, SingleSpc, 4, 4 ,false},
+{ ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false},
+{ ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false},
{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false},
-{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, true, SingleSpc, 4, 2 ,false},
+{ ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false},
+{ ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false},
{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false},
-{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, true, SingleSpc, 4, 8 ,false},
+{ ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false},
+{ ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false},
{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true},
{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true},
@@ -267,10 +276,12 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true},
{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true},
-{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,true},
-{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, true, SingleSpc, 4, 1 ,true},
-{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,true},
-{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, true, SingleSpc, 3, 1 ,true},
+{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false},
+{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false},
+{ ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false},
+{ ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false},
{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, false, SingleSpc, 2, 4 ,false},
{ ARM::VST1q16PseudoWB_fixed, ARM::VST1q16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false},
@@ -296,19 +307,25 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true},
{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true},
-{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,true},
-{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, true, SingleSpc, 2, 4 ,true},
-{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,true},
-{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, true, SingleSpc, 2, 2 ,true},
-{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,true},
-{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, true, SingleSpc, 2, 8 ,true},
-
-{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,true},
-{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, true, SingleSpc, 4, 4 ,true},
-{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,true},
-{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, true, SingleSpc, 4, 2 ,true},
-{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,true},
-{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, true, SingleSpc, 4, 8 ,true},
+{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,false},
+{ ARM::VST2d16PseudoWB_fixed, ARM::VST2d16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false},
+{ ARM::VST2d16PseudoWB_register, ARM::VST2d16wb_register, false, true, true, SingleSpc, 2, 4 ,false},
+{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,false},
+{ ARM::VST2d32PseudoWB_fixed, ARM::VST2d32wb_fixed, false, true, true, SingleSpc, 2, 2 ,false},
+{ ARM::VST2d32PseudoWB_register, ARM::VST2d32wb_register, false, true, true, SingleSpc, 2, 2 ,false},
+{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,false},
+{ ARM::VST2d8PseudoWB_fixed, ARM::VST2d8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false},
+{ ARM::VST2d8PseudoWB_register, ARM::VST2d8wb_register, false, true, true, SingleSpc, 2, 8 ,false},
+
+{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false},
+{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false},
+{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false},
+{ ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false},
+{ ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false},
{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true},
{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
@@ -620,7 +637,7 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
/// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
/// register operands to real instructions with D register operands.
void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
- unsigned Opc, bool IsExt, unsigned NumRegs) {
+ unsigned Opc, bool IsExt) {
MachineInstr &MI = *MBBI;
MachineBasicBlock &MBB = *MI.getParent();
@@ -636,11 +653,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
- MIB.addReg(D0).addReg(D1);
- if (NumRegs > 2)
- MIB.addReg(D2);
- if (NumRegs > 3)
- MIB.addReg(D3);
+ MIB.addReg(D0);
// Copy the other source register operand.
MIB.addOperand(MI.getOperand(OpIdx++));
@@ -1090,12 +1103,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VLD2q8Pseudo:
case ARM::VLD2q16Pseudo:
case ARM::VLD2q32Pseudo:
- case ARM::VLD2d8Pseudo_UPD:
- case ARM::VLD2d16Pseudo_UPD:
- case ARM::VLD2d32Pseudo_UPD:
- case ARM::VLD2q8Pseudo_UPD:
- case ARM::VLD2q16Pseudo_UPD:
- case ARM::VLD2q32Pseudo_UPD:
+ case ARM::VLD2d8PseudoWB_fixed:
+ case ARM::VLD2d16PseudoWB_fixed:
+ case ARM::VLD2d32PseudoWB_fixed:
+ case ARM::VLD2q8PseudoWB_fixed:
+ case ARM::VLD2q16PseudoWB_fixed:
+ case ARM::VLD2q32PseudoWB_fixed:
+ case ARM::VLD2d8PseudoWB_register:
+ case ARM::VLD2d16PseudoWB_register:
+ case ARM::VLD2d32PseudoWB_register:
+ case ARM::VLD2q8PseudoWB_register:
+ case ARM::VLD2q16PseudoWB_register:
+ case ARM::VLD2q32PseudoWB_register:
case ARM::VLD3d8Pseudo:
case ARM::VLD3d16Pseudo:
case ARM::VLD3d32Pseudo:
@@ -1131,9 +1150,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VLD1DUPq8Pseudo:
case ARM::VLD1DUPq16Pseudo:
case ARM::VLD1DUPq32Pseudo:
- case ARM::VLD1DUPq8Pseudo_UPD:
- case ARM::VLD1DUPq16Pseudo_UPD:
- case ARM::VLD1DUPq32Pseudo_UPD:
+ case ARM::VLD1DUPq8PseudoWB_fixed:
+ case ARM::VLD1DUPq16PseudoWB_fixed:
+ case ARM::VLD1DUPq32PseudoWB_fixed:
+ case ARM::VLD1DUPq8PseudoWB_register:
+ case ARM::VLD1DUPq16PseudoWB_register:
+ case ARM::VLD1DUPq32PseudoWB_register:
case ARM::VLD2DUPd8Pseudo:
case ARM::VLD2DUPd16Pseudo:
case ARM::VLD2DUPd32Pseudo:
@@ -1173,12 +1195,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VST2q8Pseudo:
case ARM::VST2q16Pseudo:
case ARM::VST2q32Pseudo:
- case ARM::VST2d8Pseudo_UPD:
- case ARM::VST2d16Pseudo_UPD:
- case ARM::VST2d32Pseudo_UPD:
- case ARM::VST2q8Pseudo_UPD:
- case ARM::VST2q16Pseudo_UPD:
- case ARM::VST2q32Pseudo_UPD:
+ case ARM::VST2d8PseudoWB_fixed:
+ case ARM::VST2d16PseudoWB_fixed:
+ case ARM::VST2d32PseudoWB_fixed:
+ case ARM::VST2q8PseudoWB_fixed:
+ case ARM::VST2q16PseudoWB_fixed:
+ case ARM::VST2q32PseudoWB_fixed:
+ case ARM::VST2d8PseudoWB_register:
+ case ARM::VST2d16PseudoWB_register:
+ case ARM::VST2d32PseudoWB_register:
+ case ARM::VST2q8PseudoWB_register:
+ case ARM::VST2q16PseudoWB_register:
+ case ARM::VST2q32PseudoWB_register:
case ARM::VST3d8Pseudo:
case ARM::VST3d16Pseudo:
case ARM::VST3d32Pseudo:
@@ -1186,7 +1214,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VST3d8Pseudo_UPD:
case ARM::VST3d16Pseudo_UPD:
case ARM::VST3d32Pseudo_UPD:
- case ARM::VST1d64TPseudo_UPD:
+ case ARM::VST1d64TPseudoWB_fixed:
+ case ARM::VST1d64TPseudoWB_register:
case ARM::VST3q8Pseudo_UPD:
case ARM::VST3q16Pseudo_UPD:
case ARM::VST3q32Pseudo_UPD:
@@ -1203,7 +1232,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VST4d8Pseudo_UPD:
case ARM::VST4d16Pseudo_UPD:
case ARM::VST4d32Pseudo_UPD:
- case ARM::VST1d64QPseudo_UPD:
+ case ARM::VST1d64QPseudoWB_fixed:
+ case ARM::VST1d64QPseudoWB_register:
case ARM::VST4q8Pseudo_UPD:
case ARM::VST4q16Pseudo_UPD:
case ARM::VST4q32Pseudo_UPD:
@@ -1291,12 +1321,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
ExpandLaneOp(MBBI);
return true;
- case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true;
- case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true;
- case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true;
- case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true;
- case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true;
- case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true;
+ case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false); return true;
+ case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
+ case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
+ case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true); return true;
+ case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
+ case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
}
return false;
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 9bae422..a98dfc3 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -178,10 +178,12 @@ class ARMFastISel : public FastISel {
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt);
- bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, bool isZExt,
- bool allocReg);
+ bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
+ unsigned Alignment = 0, bool isZExt = true,
+ bool allocReg = true);
- bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
+ bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+ unsigned Alignment = 0);
bool ARMComputeAddress(const Value *Obj, Address &Addr);
void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3);
bool ARMIsMemCpySmall(uint64_t Len);
@@ -227,8 +229,7 @@ class ARMFastISel : public FastISel {
// we don't care about implicit defs here, just places we'll need to add a
// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.hasOptionalDef())
+ if (!MI->hasOptionalDef())
return false;
// Look to see if our OptionalDef is defining CPSR or CCR.
@@ -702,7 +703,7 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
TargetRegisterClass* RC = TLI.getRegClassFor(VT);
unsigned ResultReg = createResultReg(RC);
unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
.addFrameIndex(SI->second)
.addImm(0));
@@ -898,7 +899,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) {
ARM::GPRRegisterClass;
unsigned ResultReg = createResultReg(RC);
unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
.addFrameIndex(Addr.Base.FI)
.addImm(0));
@@ -937,7 +938,8 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
// Now add the rest of the operands.
MIB.addFrameIndex(FI);
- // ARM halfword load/stores and signed byte loads need an additional operand.
+ // ARM halfword load/stores and signed byte loads need an additional
+ // operand.
if (useAM3) {
signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
MIB.addReg(0);
@@ -950,7 +952,8 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
// Now add the rest of the operands.
MIB.addReg(Addr.Base.Reg);
- // ARM halfword load/stores and signed byte loads need an additional operand.
+ // ARM halfword load/stores and signed byte loads need an additional
+ // operand.
if (useAM3) {
signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
MIB.addReg(0);
@@ -963,10 +966,11 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
}
bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
- bool isZExt = true, bool allocReg = true) {
+ unsigned Alignment, bool isZExt, bool allocReg) {
assert(VT.isSimple() && "Non-simple types are invalid here!");
unsigned Opc;
bool useAM3 = false;
+ bool needVMOV = false;
TargetRegisterClass *RC;
switch (VT.getSimpleVT().SimpleTy) {
// This is mostly going to be Neon/vector support.
@@ -1012,10 +1016,25 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
RC = ARM::GPRRegisterClass;
break;
case MVT::f32:
- Opc = ARM::VLDRS;
- RC = TLI.getRegClassFor(VT);
+ if (!Subtarget->hasVFP2()) return false;
+ // Unaligned loads need special handling. Floats require word-alignment.
+ if (Alignment && Alignment < 4) {
+ needVMOV = true;
+ VT = MVT::i32;
+ Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
+ RC = ARM::GPRRegisterClass;
+ } else {
+ Opc = ARM::VLDRS;
+ RC = TLI.getRegClassFor(VT);
+ }
break;
case MVT::f64:
+ if (!Subtarget->hasVFP2()) return false;
+ // FIXME: Unaligned loads need special handling. Doublewords require
+ // word-alignment.
+ if (Alignment && Alignment < 4)
+ return false;
+
Opc = ARM::VLDRD;
RC = TLI.getRegClassFor(VT);
break;
@@ -1030,6 +1049,16 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg);
AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
+
+ // If we had an unaligned load of a float we've converted it to an regular
+ // load. Now we must move from the GRP to the FP register.
+ if (needVMOV) {
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVSR), MoveReg)
+ .addReg(ResultReg));
+ ResultReg = MoveReg;
+ }
return true;
}
@@ -1048,12 +1077,14 @@ bool ARMFastISel::SelectLoad(const Instruction *I) {
if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
unsigned ResultReg;
- if (!ARMEmitLoad(VT, ResultReg, Addr)) return false;
+ if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
+ return false;
UpdateValueMap(I, ResultReg);
return true;
}
-bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
+bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+ unsigned Alignment) {
unsigned StrOpc;
bool useAM3 = false;
switch (VT.getSimpleVT().SimpleTy) {
@@ -1101,10 +1132,26 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
break;
case MVT::f32:
if (!Subtarget->hasVFP2()) return false;
- StrOpc = ARM::VSTRS;
+ // Unaligned stores need special handling. Floats require word-alignment.
+ if (Alignment && Alignment < 4) {
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVRS), MoveReg)
+ .addReg(SrcReg));
+ SrcReg = MoveReg;
+ VT = MVT::i32;
+ StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
+ } else {
+ StrOpc = ARM::VSTRS;
+ }
break;
case MVT::f64:
if (!Subtarget->hasVFP2()) return false;
+ // FIXME: Unaligned stores need special handling. Doublewords require
+ // word-alignment.
+ if (Alignment && Alignment < 4)
+ return false;
+
StrOpc = ARM::VSTRD;
break;
}
@@ -1141,7 +1188,8 @@ bool ARMFastISel::SelectStore(const Instruction *I) {
if (!ARMComputeAddress(I->getOperand(1), Addr))
return false;
- if (!ARMEmitStore(VT, SrcReg, Addr)) return false;
+ if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
+ return false;
return true;
}
@@ -1360,7 +1408,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
unsigned SrcReg1 = getRegForValue(Src1Value);
if (SrcReg1 == 0) return false;
- unsigned SrcReg2;
+ unsigned SrcReg2 = 0;
if (!UseImm) {
SrcReg2 = getRegForValue(Src2Value);
if (SrcReg2 == 0) return false;
@@ -1577,7 +1625,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
(ARM_AM::getSOImmVal(Imm) != -1);
}
- unsigned Op2Reg;
+ unsigned Op2Reg = 0;
if (!UseImm) {
Op2Reg = getRegForValue(I->getOperand(2));
if (Op2Reg == 0) return false;
@@ -1716,7 +1764,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
// Use target triple & subtarget features to do actual dispatch.
if (Subtarget->isAAPCS_ABI()) {
if (Subtarget->hasVFP2() &&
- FloatABIType == FloatABI::Hard)
+ TM.Options.FloatABIType == FloatABI::Hard)
return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
else
return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
@@ -1765,21 +1813,23 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
switch (VA.getLocInfo()) {
case CCValAssign::Full: break;
case CCValAssign::SExt: {
- EVT DestVT = VA.getLocVT();
+ MVT DestVT = VA.getLocVT();
unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT,
/*isZExt*/false);
assert (ResultReg != 0 && "Failed to emit a sext");
Arg = ResultReg;
+ ArgVT = DestVT;
break;
}
case CCValAssign::AExt:
// Intentional fall-through. Handle AExt and ZExt.
case CCValAssign::ZExt: {
- EVT DestVT = VA.getLocVT();
+ MVT DestVT = VA.getLocVT();
unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT,
/*isZExt*/true);
assert (ResultReg != 0 && "Failed to emit a sext");
Arg = ResultReg;
+ ArgVT = DestVT;
break;
}
case CCValAssign::BCvt: {
@@ -2456,7 +2506,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
unsigned ResultReg = MI->getOperand(0).getReg();
- if (!ARMEmitLoad(VT, ResultReg, Addr, isZExt, false))
+ if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
return false;
MI->eraseFromParent();
return true;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 2d1de6f..06944b1 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -37,7 +37,8 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
// Always eliminate non-leaf frame pointers.
- return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
+ return ((MF.getTarget().Options.DisableFramePointerElim(MF) &&
+ MFI->hasCalls()) ||
RegInfo->needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken());
@@ -309,8 +310,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI->getDesc().isReturn() &&
- "Can only insert epilog into returning blocks");
+ assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index 787f6a2..a5fd15b 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -21,7 +21,7 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
// FIXME: Detect integer instructions properly.
const MCInstrDesc &MCID = MI->getDesc();
unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
- if (MCID.mayStore())
+ if (MI->mayStore())
return false;
unsigned Opcode = MCID.getOpcode();
if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
@@ -38,9 +38,6 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
MachineInstr *MI = SU->getInstr();
if (!MI->isDebugValue()) {
- if (ITBlockSize && MI != ITBlockMIs[ITBlockSize-1])
- return Hazard;
-
// Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following
// a VMLA / VMLS will cause 4 cycle stall.
const MCInstrDesc &MCID = MI->getDesc();
@@ -48,9 +45,9 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
MachineInstr *DefMI = LastMI;
const MCInstrDesc &LastMCID = LastMI->getDesc();
// Skip over one non-VFP / NEON instruction.
- if (!LastMCID.isBarrier() &&
+ if (!LastMI->isBarrier() &&
// On A9, AGU and NEON/FPU are muxed.
- !(STI.isCortexA9() && (LastMCID.mayLoad() || LastMCID.mayStore())) &&
+ !(STI.isCortexA9() && (LastMI->mayLoad() || LastMI->mayStore())) &&
(LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
MachineBasicBlock::iterator I = LastMI;
if (I != LastMI->getParent()->begin()) {
@@ -76,30 +73,11 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
void ARMHazardRecognizer::Reset() {
LastMI = 0;
FpMLxStalls = 0;
- ITBlockSize = 0;
ScoreboardHazardRecognizer::Reset();
}
void ARMHazardRecognizer::EmitInstruction(SUnit *SU) {
MachineInstr *MI = SU->getInstr();
- unsigned Opcode = MI->getOpcode();
- if (ITBlockSize) {
- --ITBlockSize;
- } else if (Opcode == ARM::t2IT) {
- unsigned Mask = MI->getOperand(1).getImm();
- unsigned NumTZ = CountTrailingZeros_32(Mask);
- assert(NumTZ <= 3 && "Invalid IT mask!");
- ITBlockSize = 4 - NumTZ;
- MachineBasicBlock::iterator I = MI;
- for (unsigned i = 0; i < ITBlockSize; ++i) {
- // Advance to the next instruction, skipping any dbg_value instructions.
- do {
- ++I;
- } while (I->isDebugValue());
- ITBlockMIs[ITBlockSize-1-i] = &*I;
- }
- }
-
if (!MI->isDebugValue()) {
LastMI = MI;
FpMLxStalls = 0;
diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h
index 2bc218d..98bfc4c 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/lib/Target/ARM/ARMHazardRecognizer.h
@@ -23,6 +23,10 @@ class ARMBaseRegisterInfo;
class ARMSubtarget;
class MachineInstr;
+/// ARMHazardRecognizer handles special constraints that are not expressed in
+/// the scheduling itinerary. This is only used during postRA scheduling. The
+/// ARM preRA scheduler uses an unspecialized instance of the
+/// ScoreboardHazardRecognizer.
class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
const ARMBaseInstrInfo &TII;
const ARMBaseRegisterInfo &TRI;
@@ -30,8 +34,6 @@ class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
MachineInstr *LastMI;
unsigned FpMLxStalls;
- unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled.
- MachineInstr *ITBlockMIs[4];
public:
ARMHazardRecognizer(const InstrItineraryData *ItinData,
@@ -40,7 +42,7 @@ public:
const ARMSubtarget &sti,
const ScheduleDAG *DAG) :
ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii),
- TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {}
+ TRI(tri), STI(sti), LastMI(0) {}
virtual HazardType getHazardType(SUnit *SU, int Stalls);
virtual void Reset();
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index bc8588f..7473141 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1579,6 +1579,22 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
case ARM::VST1q16PseudoWB_fixed: return ARM::VST1q16PseudoWB_register;
case ARM::VST1q32PseudoWB_fixed: return ARM::VST1q32PseudoWB_register;
case ARM::VST1q64PseudoWB_fixed: return ARM::VST1q64PseudoWB_register;
+ case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
+ case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
+
+ case ARM::VLD2d8PseudoWB_fixed: return ARM::VLD2d8PseudoWB_register;
+ case ARM::VLD2d16PseudoWB_fixed: return ARM::VLD2d16PseudoWB_register;
+ case ARM::VLD2d32PseudoWB_fixed: return ARM::VLD2d32PseudoWB_register;
+ case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
+ case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
+ case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
+
+ case ARM::VST2d8PseudoWB_fixed: return ARM::VST2d8PseudoWB_register;
+ case ARM::VST2d16PseudoWB_fixed: return ARM::VST2d16PseudoWB_register;
+ case ARM::VST2d32PseudoWB_fixed: return ARM::VST2d32PseudoWB_register;
+ case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
+ case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
+ case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
}
return Opc; // If not one we handle, return it unchanged.
}
@@ -1646,13 +1662,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
- // FIXME: VLD1 fixed increment doesn't need Reg0. Remove the reg0
+ // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
// case entirely when the rest are updated to that form, too.
- if (NumVecs == 1 && !isa<ConstantSDNode>(Inc.getNode()))
+ if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode()))
Opc = getVLDSTRegisterUpdateOpcode(Opc);
- // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
+ // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
- if ((NumVecs != 1 && Opc != ARM::VLD1q64PseudoWB_fixed) ||
+ if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64PseudoWB_fixed) ||
!isa<ConstantSDNode>(Inc.getNode()))
Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
}
@@ -1796,9 +1812,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Align);
if (isUpdating) {
SDValue Inc = N->getOperand(AddrOpIdx + 1);
- // FIXME: VST1 fixed increment doesn't need Reg0. Remove the reg0
+ // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
// case entirely when the rest are updated to that form, too.
- if (NumVecs == 1 && !isa<ConstantSDNode>(Inc.getNode()))
+ if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
Opc = getVLDSTRegisterUpdateOpcode(Opc);
// We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
// check for that explicitly too. Horribly hacky, but temporary.
@@ -2810,10 +2826,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VLD2_UPD: {
- unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD,
- ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed};
- unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD,
- ARM::VLD2q32Pseudo_UPD };
+ unsigned DOpcodes[] = { ARM::VLD2d8PseudoWB_fixed,
+ ARM::VLD2d16PseudoWB_fixed,
+ ARM::VLD2d32PseudoWB_fixed,
+ ARM::VLD1q64PseudoWB_fixed};
+ unsigned QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
+ ARM::VLD2q16PseudoWB_fixed,
+ ARM::VLD2q32PseudoWB_fixed };
return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
}
@@ -2876,16 +2895,19 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VST2_UPD: {
- unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD,
- ARM::VST2d32Pseudo_UPD, ARM::VST1q64PseudoWB_fixed};
- unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD,
- ARM::VST2q32Pseudo_UPD };
+ unsigned DOpcodes[] = { ARM::VST2d8PseudoWB_fixed,
+ ARM::VST2d16PseudoWB_fixed,
+ ARM::VST2d32PseudoWB_fixed,
+ ARM::VST1q64PseudoWB_fixed};
+ unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
+ ARM::VST2q16PseudoWB_fixed,
+ ARM::VST2q32PseudoWB_fixed };
return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
}
case ARMISD::VST3_UPD: {
unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD,
- ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD };
+ ARM::VST3d32Pseudo_UPD,ARM::VST1d64TPseudoWB_fixed};
unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
ARM::VST3q16Pseudo_UPD,
ARM::VST3q32Pseudo_UPD };
@@ -2897,7 +2919,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ARMISD::VST4_UPD: {
unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD,
- ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD };
+ ARM::VST4d32Pseudo_UPD,ARM::VST1d64QPseudoWB_fixed};
unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
ARM::VST4q16Pseudo_UPD,
ARM::VST4q32Pseudo_UPD };
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 8c4c06f..c6c1f5b 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -72,7 +72,7 @@ ARMInterworking("arm-interworking", cl::Hidden,
cl::desc("Enable / disable ARM interworking (for debugging only)"),
cl::init(true));
-namespace llvm {
+namespace {
class ARMCCState : public CCState {
public:
ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
@@ -432,7 +432,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
else
addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
- if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
if (!Subtarget->isFPOnlySP())
addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
@@ -467,13 +468,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// v2f64 is legal so that QR subregs can be extracted as f64 elements, but
// neither Neon nor VFP support any arithmetic operations on it.
+ // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
+ // supported for v4f32.
setOperationAction(ISD::FADD, MVT::v2f64, Expand);
setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+ // FIXME: Code duplication: FDIV and FREM are expanded always, see
+ // ARMTargetLowering::addTypeForNEON method for details.
setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
setOperationAction(ISD::FREM, MVT::v2f64, Expand);
+ // FIXME: Create unittest.
+ // In another words, find a way when "copysign" appears in DAG with vector
+ // operands.
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
+ // FIXME: Code duplication: SETCC has custom operation action, see
+ // ARMTargetLowering::addTypeForNEON method for details.
setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
+ // FIXME: Create unittest for FNEG and for FABS.
setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
setOperationAction(ISD::FABS, MVT::v2f64, Expand);
setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
@@ -486,11 +497,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
+ // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
// Neon does not support some operations on v1i64 and v2i64 types.
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
@@ -586,6 +609,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ // These just redirect to CTTZ and CTLZ on ARM.
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand);
+
// Only ARMv6 has BSWAP.
if (!Subtarget->hasV6Ops())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
@@ -674,7 +701,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
// iff target supports vfp2.
setOperationAction(ISD::BITCAST, MVT::i64, Custom);
@@ -712,7 +740,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f32, Expand);
- if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+ !Subtarget->isThumb1Only()) {
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
}
@@ -723,7 +752,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FMA, MVT::f32, Expand);
// Various VFP goodness
- if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
+ if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
// int <-> fp are custom expanded into bit_convert + ARMISD ops.
if (Subtarget->hasVFP2()) {
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
@@ -751,7 +780,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setStackPointerRegisterToSaveRestore(ARM::SP);
- if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
+ if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||
+ !Subtarget->hasVFP2())
setSchedulingPreference(Sched::RegPressure);
else
setSchedulingPreference(Sched::Hybrid);
@@ -1092,7 +1122,8 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
if (!Subtarget->isAAPCS_ABI())
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
else if (Subtarget->hasVFP2() &&
- FloatABIType == FloatABI::Hard && !isVarArg)
+ getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
+ !isVarArg)
return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
}
@@ -2951,7 +2982,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
- if (UnsafeFPMath &&
+ if (getTargetMachine().Options.UnsafeFPMath &&
(CC == ISD::SETEQ || CC == ISD::SETOEQ ||
CC == ISD::SETNE || CC == ISD::SETUNE)) {
SDValue Result = OptimizeVFPBrcond(Op, DAG);
@@ -3978,9 +4009,8 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
// Use vmov.f32 to materialize other v2f32 and v4f32 splats.
- if (VT == MVT::v2f32 || VT == MVT::v4f32) {
- ConstantFPSDNode *C = cast<ConstantFPSDNode>(Op.getOperand(0));
- int ImmVal = ARM_AM::getFP32Imm(C->getValueAPF());
+ if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
+ int ImmVal = ARM_AM::getFP32Imm(SplatBits);
if (ImmVal != -1) {
SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
@@ -6010,7 +6040,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
// executed.
for (MachineBasicBlock::reverse_iterator
II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
- if (!II->getDesc().isCall()) continue;
+ if (!II->isCall()) continue;
DenseMap<unsigned, bool> DefRegs;
for (MachineInstr::mop_iterator
@@ -6421,13 +6451,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
- const MCInstrDesc *MCID = &MI->getDesc();
- if (!MCID->hasPostISelHook()) {
+ if (!MI->hasPostISelHook()) {
assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
"Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
return;
}
+ const MCInstrDesc *MCID = &MI->getDesc();
// Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
// RSC. Coming out of isel, they have an implicit CPSR def, but the optional
// operand is still set to noreg. If needed, set the optional operand's
@@ -6454,7 +6484,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// Any ARM instruction that sets the 's' bit should specify an optional
// "cc_out" operand in the last operand position.
- if (!MCID->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
+ if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
assert(!NewOpc && "Optional cc_out operand required");
return;
}
@@ -7948,7 +7978,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
// will return -0, so vmin can only be used for unsafe math or if one of
// the operands is known to be nonzero.
if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
- !UnsafeFPMath &&
+ !DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
@@ -7970,7 +8000,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
// will return +0, so vmax can only be used for unsafe math or if one of
// the operands is known to be nonzero.
if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
- !UnsafeFPMath &&
+ !DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 6940156..80f3773 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -201,21 +201,29 @@ def msr_mask : Operand<i32> {
// 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0>
// 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0>
// 64 64 - <imm> is encoded in imm6<5:0>
+def shr_imm8_asm_operand : ImmAsmOperand { let Name = "ShrImm8"; }
def shr_imm8 : Operand<i32> {
let EncoderMethod = "getShiftRight8Imm";
let DecoderMethod = "DecodeShiftRight8Imm";
+ let ParserMatchClass = shr_imm8_asm_operand;
}
+def shr_imm16_asm_operand : ImmAsmOperand { let Name = "ShrImm16"; }
def shr_imm16 : Operand<i32> {
let EncoderMethod = "getShiftRight16Imm";
let DecoderMethod = "DecodeShiftRight16Imm";
+ let ParserMatchClass = shr_imm16_asm_operand;
}
+def shr_imm32_asm_operand : ImmAsmOperand { let Name = "ShrImm32"; }
def shr_imm32 : Operand<i32> {
let EncoderMethod = "getShiftRight32Imm";
let DecoderMethod = "DecodeShiftRight32Imm";
+ let ParserMatchClass = shr_imm32_asm_operand;
}
+def shr_imm64_asm_operand : ImmAsmOperand { let Name = "ShrImm64"; }
def shr_imm64 : Operand<i32> {
let EncoderMethod = "getShiftRight64Imm";
let DecoderMethod = "DecodeShiftRight64Imm";
+ let ParserMatchClass = shr_imm64_asm_operand;
}
//===----------------------------------------------------------------------===//
@@ -231,6 +239,14 @@ class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>;
class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>;
+class NEONInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasNEON]>;
+
+
+class VFP2MnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
+ Requires<[HasVFP2]>;
+class NEONMnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
+ Requires<[HasNEON]>;
//===----------------------------------------------------------------------===//
// ARM Instruction templates.
@@ -1994,73 +2010,111 @@ class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
// VFP/NEON Instruction aliases for type suffices.
class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result> :
- InstAlias<!strconcat(opc, dt, asm), Result>;
-multiclass VFPDT8ReqInstAlias<string opc, string asm, dag Result> {
- def I8 : VFPDataTypeInstAlias<opc, ".i8", asm, Result>;
- def S8 : VFPDataTypeInstAlias<opc, ".s8", asm, Result>;
- def U8 : VFPDataTypeInstAlias<opc, ".u8", asm, Result>;
- def F8 : VFPDataTypeInstAlias<opc, ".p8", asm, Result>;
-}
-// VFPDT8ReqInstAlias plus plain ".8"
-multiclass VFPDT8InstAlias<string opc, string asm, dag Result> {
- def _8 : VFPDataTypeInstAlias<opc, ".8", asm, Result>;
- defm : VFPDT8ReqInstAlias<opc, asm, Result>;
-}
-multiclass VFPDT16ReqInstAlias<string opc, string asm, dag Result> {
- def I16 : VFPDataTypeInstAlias<opc, ".i16", asm, Result>;
- def S16 : VFPDataTypeInstAlias<opc, ".s16", asm, Result>;
- def U16 : VFPDataTypeInstAlias<opc, ".u16", asm, Result>;
- def F16 : VFPDataTypeInstAlias<opc, ".p16", asm, Result>;
-}
-// VFPDT16ReqInstAlias plus plain ".16"
-multiclass VFPDT16InstAlias<string opc, string asm, dag Result> {
- def _16 : VFPDataTypeInstAlias<opc, ".16", asm, Result>;
- defm : VFPDT16ReqInstAlias<opc, asm, Result>;
-}
-multiclass VFPDT32ReqInstAlias<string opc, string asm, dag Result> {
- def I32 : VFPDataTypeInstAlias<opc, ".i32", asm, Result>;
- def S32 : VFPDataTypeInstAlias<opc, ".s32", asm, Result>;
- def U32 : VFPDataTypeInstAlias<opc, ".u32", asm, Result>;
- def F32 : VFPDataTypeInstAlias<opc, ".f32", asm, Result>;
- def F : VFPDataTypeInstAlias<opc, ".f", asm, Result>;
-}
-// VFPDT32ReqInstAlias plus plain ".32"
-multiclass VFPDT32InstAlias<string opc, string asm, dag Result> {
- def _32 : VFPDataTypeInstAlias<opc, ".32", asm, Result>;
- defm : VFPDT32ReqInstAlias<opc, asm, Result>;
-}
-multiclass VFPDT64ReqInstAlias<string opc, string asm, dag Result> {
- def I64 : VFPDataTypeInstAlias<opc, ".i64", asm, Result>;
- def S64 : VFPDataTypeInstAlias<opc, ".s64", asm, Result>;
- def U64 : VFPDataTypeInstAlias<opc, ".u64", asm, Result>;
- def F64 : VFPDataTypeInstAlias<opc, ".f64", asm, Result>;
- def D : VFPDataTypeInstAlias<opc, ".d", asm, Result>;
-}
-// VFPDT64ReqInstAlias plus plain ".64"
-multiclass VFPDT64InstAlias<string opc, string asm, dag Result> {
- def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
- defm : VFPDT64ReqInstAlias<opc, asm, Result>;
-}
-multiclass VFPDT64NoF64ReqInstAlias<string opc, string asm, dag Result> {
- def I64 : VFPDataTypeInstAlias<opc, ".i64", asm, Result>;
- def S64 : VFPDataTypeInstAlias<opc, ".s64", asm, Result>;
- def U64 : VFPDataTypeInstAlias<opc, ".u64", asm, Result>;
- def D : VFPDataTypeInstAlias<opc, ".d", asm, Result>;
-}
-// VFPDT64ReqInstAlias plus plain ".64"
-multiclass VFPDT64NoF64InstAlias<string opc, string asm, dag Result> {
- def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
- defm : VFPDT64ReqInstAlias<opc, asm, Result>;
-}
+ InstAlias<!strconcat(opc, dt, "\t", asm), Result>, Requires<[HasVFP2]>;
+
multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> {
- defm : VFPDT8InstAlias<opc, asm, Result>;
- defm : VFPDT16InstAlias<opc, asm, Result>;
- defm : VFPDT32InstAlias<opc, asm, Result>;
- defm : VFPDT64InstAlias<opc, asm, Result>;
-}
-multiclass VFPDTAnyNoF64InstAlias<string opc, string asm, dag Result> {
- defm : VFPDT8InstAlias<opc, asm, Result>;
- defm : VFPDT16InstAlias<opc, asm, Result>;
- defm : VFPDT32InstAlias<opc, asm, Result>;
- defm : VFPDT64NoF64InstAlias<opc, asm, Result>;
-}
+ def : VFPDataTypeInstAlias<opc, ".8", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".16", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".32", asm, Result>;
+ def : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
+}
+
+// The same alias classes using AsmPseudo instead, for the more complex
+// stuff in NEON that InstAlias can't quite handle.
+// Note that we can't use anonymous defm references here like we can
+// above, as we care about the ultimate instruction enum names generated, unlike
+// for instalias defs.
+class NEONDataTypeAsmPseudoInst<string opc, string dt, string asm, dag iops> :
+ AsmPseudoInst<!strconcat(opc, dt, "\t", asm), iops>, Requires<[HasNEON]>;
+multiclass NEONDT8ReqAsmPseudoInst<string opc, string asm, dag iops> {
+ def I8 : NEONDataTypeAsmPseudoInst<opc, ".i8", asm, iops>;
+ def S8 : NEONDataTypeAsmPseudoInst<opc, ".s8", asm, iops>;
+ def U8 : NEONDataTypeAsmPseudoInst<opc, ".u8", asm, iops>;
+ def P8 : NEONDataTypeAsmPseudoInst<opc, ".p8", asm, iops>;
+}
+// NEONDT8ReqAsmPseudoInst plus plain ".8"
+multiclass NEONDT8AsmPseudoInst<string opc, string asm, dag iops> {
+ def _8 : NEONDataTypeAsmPseudoInst<opc, ".8", asm, iops>;
+ defm _ : NEONDT8ReqAsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDT16ReqAsmPseudoInst<string opc, string asm, dag iops> {
+ def I16 : NEONDataTypeAsmPseudoInst<opc, ".i16", asm, iops>;
+ def S16 : NEONDataTypeAsmPseudoInst<opc, ".s16", asm, iops>;
+ def U16 : NEONDataTypeAsmPseudoInst<opc, ".u16", asm, iops>;
+ def P16 : NEONDataTypeAsmPseudoInst<opc, ".p16", asm, iops>;
+}
+// NEONDT16ReqAsmPseudoInst plus plain ".16"
+multiclass NEONDT16AsmPseudoInst<string opc, string asm, dag iops> {
+ def _16 : NEONDataTypeAsmPseudoInst<opc, ".16", asm, iops>;
+ defm _ : NEONDT16ReqAsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDT32ReqAsmPseudoInst<string opc, string asm, dag iops> {
+ def I32 : NEONDataTypeAsmPseudoInst<opc, ".i32", asm, iops>;
+ def S32 : NEONDataTypeAsmPseudoInst<opc, ".s32", asm, iops>;
+ def U32 : NEONDataTypeAsmPseudoInst<opc, ".u32", asm, iops>;
+ def F32 : NEONDataTypeAsmPseudoInst<opc, ".f32", asm, iops>;
+ def F : NEONDataTypeAsmPseudoInst<opc, ".f", asm, iops>;
+}
+// NEONDT32ReqAsmPseudoInst plus plain ".32"
+multiclass NEONDT32AsmPseudoInst<string opc, string asm, dag iops> {
+ def _32 : NEONDataTypeAsmPseudoInst<opc, ".32", asm, iops>;
+ defm _ : NEONDT32ReqAsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDT64ReqAsmPseudoInst<string opc, string asm, dag iops> {
+ def I64 : NEONDataTypeAsmPseudoInst<opc, ".i64", asm, iops>;
+ def S64 : NEONDataTypeAsmPseudoInst<opc, ".s64", asm, iops>;
+ def U64 : NEONDataTypeAsmPseudoInst<opc, ".u64", asm, iops>;
+ def F64 : NEONDataTypeAsmPseudoInst<opc, ".f64", asm, iops>;
+ def D : NEONDataTypeAsmPseudoInst<opc, ".d", asm, iops>;
+}
+// NEONDT64ReqAsmPseudoInst plus plain ".64"
+multiclass NEONDT64AsmPseudoInst<string opc, string asm, dag iops> {
+ def _64 : NEONDataTypeAsmPseudoInst<opc, ".64", asm, iops>;
+ defm _ : NEONDT64ReqAsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDT64NoF64ReqAsmPseudoInst<string opc, string asm, dag iops> {
+ def I64 : NEONDataTypeAsmPseudoInst<opc, ".i64", asm, iops>;
+ def S64 : NEONDataTypeAsmPseudoInst<opc, ".s64", asm, iops>;
+ def U64 : NEONDataTypeAsmPseudoInst<opc, ".u64", asm, iops>;
+ def D : NEONDataTypeAsmPseudoInst<opc, ".d", asm, iops>;
+}
+// NEONDT64ReqAsmPseudoInst plus plain ".64"
+multiclass NEONDT64NoF64AsmPseudoInst<string opc, string asm, dag iops> {
+ def _64 : NEONDataTypeAsmPseudoInst<opc, ".64", asm, iops>;
+ defm _ : NEONDT64ReqAsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDTAnyAsmPseudoInst<string opc, string asm, dag iops> {
+ defm _ : NEONDT8AsmPseudoInst<opc, asm, iops>;
+ defm _ : NEONDT16AsmPseudoInst<opc, asm, iops>;
+ defm _ : NEONDT32AsmPseudoInst<opc, asm, iops>;
+ defm _ : NEONDT64AsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDTAnyNoF64AsmPseudoInst<string opc, string asm, dag iops> {
+ defm _ : NEONDT8AsmPseudoInst<opc, asm, iops>;
+ defm _ : NEONDT16AsmPseudoInst<opc, asm, iops>;
+ defm _ : NEONDT32AsmPseudoInst<opc, asm, iops>;
+ defm _ : NEONDT64NoF64AsmPseudoInst<opc, asm, iops>;
+}
+
+// Data type suffix token aliases. Implements Table A7-3 in the ARM ARM.
+def : TokenAlias<".s8", ".i8">;
+def : TokenAlias<".u8", ".i8">;
+def : TokenAlias<".s16", ".i16">;
+def : TokenAlias<".u16", ".i16">;
+def : TokenAlias<".s32", ".i32">;
+def : TokenAlias<".u32", ".i32">;
+def : TokenAlias<".s64", ".i64">;
+def : TokenAlias<".u64", ".i64">;
+
+def : TokenAlias<".i8", ".8">;
+def : TokenAlias<".i16", ".16">;
+def : TokenAlias<".i32", ".32">;
+def : TokenAlias<".i64", ".64">;
+
+def : TokenAlias<".p8", ".8">;
+def : TokenAlias<".p16", ".16">;
+
+def : TokenAlias<".f32", ".32">;
+def : TokenAlias<".f64", ".64">;
+def : TokenAlias<".f", ".f32">;
+def : TokenAlias<".d", ".f64">;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index be03924..516a080 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -238,27 +238,23 @@ def so_imm_not_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
}]>;
-/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
-def imm1_15 : ImmLeaf<i32, [{
- return (int32_t)Imm >= 1 && (int32_t)Imm < 16;
-}]>;
-
/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
def imm16_31 : ImmLeaf<i32, [{
return (int32_t)Imm >= 16 && (int32_t)Imm < 32;
}]>;
-def so_imm_neg :
- PatLeaf<(imm), [{
+def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; }
+def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1;
- }], so_imm_neg_XFORM>;
+ }], so_imm_neg_XFORM> {
+ let ParserMatchClass = so_imm_neg_asmoperand;
+}
// Note: this pattern doesn't require an encoder method and such, as it's
// only used on aliases (Pat<> and InstAlias<>). The actual encoding
-// is handled by the destination instructions, which use t2_so_imm.
+// is handled by the destination instructions, which use so_imm.
def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; }
-def so_imm_not :
- Operand<i32>, PatLeaf<(imm), [{
+def so_imm_not : Operand<i32>, PatLeaf<(imm), [{
return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
}], so_imm_not_XFORM> {
let ParserMatchClass = so_imm_not_asmoperand;
@@ -512,6 +508,14 @@ def arm_i32imm : PatLeaf<(imm), [{
return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
}]>;
+/// imm0_1 predicate - Immediate in the range [0,1].
+def Imm0_1AsmOperand: ImmAsmOperand { let Name = "Imm0_1"; }
+def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
+
+/// imm0_3 predicate - Immediate in the range [0,3].
+def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
+def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
+
/// imm0_7 predicate - Immediate in the range [0,7].
def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
@@ -520,6 +524,42 @@ def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_7AsmOperand;
}
+/// imm8 predicate - Immediate is exactly 8.
+def Imm8AsmOperand: ImmAsmOperand { let Name = "Imm8"; }
+def imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 8; }]> {
+ let ParserMatchClass = Imm8AsmOperand;
+}
+
+/// imm16 predicate - Immediate is exactly 16.
+def Imm16AsmOperand: ImmAsmOperand { let Name = "Imm16"; }
+def imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 16; }]> {
+ let ParserMatchClass = Imm16AsmOperand;
+}
+
+/// imm32 predicate - Immediate is exactly 32.
+def Imm32AsmOperand: ImmAsmOperand { let Name = "Imm32"; }
+def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> {
+ let ParserMatchClass = Imm32AsmOperand;
+}
+
+/// imm1_7 predicate - Immediate in the range [1,7].
+def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; }
+def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> {
+ let ParserMatchClass = Imm1_7AsmOperand;
+}
+
+/// imm1_15 predicate - Immediate in the range [1,15].
+def Imm1_15AsmOperand: ImmAsmOperand { let Name = "Imm1_15"; }
+def imm1_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 16; }]> {
+ let ParserMatchClass = Imm1_15AsmOperand;
+}
+
+/// imm1_31 predicate - Immediate in the range [1,31].
+def Imm1_31AsmOperand: ImmAsmOperand { let Name = "Imm1_31"; }
+def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> {
+ let ParserMatchClass = Imm1_31AsmOperand;
+}
+
/// imm0_15 predicate - Immediate in the range [0,15].
def Imm0_15AsmOperand: ImmAsmOperand { let Name = "Imm0_15"; }
def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
@@ -544,6 +584,14 @@ def imm0_32 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_32AsmOperand;
}
+/// imm0_63 predicate - True if the 32-bit immediate is in the range [0,63].
+def Imm0_63AsmOperand: ImmAsmOperand { let Name = "Imm0_63"; }
+def imm0_63 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 64;
+}]> {
+ let ParserMatchClass = Imm0_63AsmOperand;
+}
+
/// imm0_255 predicate - Immediate in the range [0,255].
def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; }
def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
@@ -812,6 +860,9 @@ def addrmode6dup : Operand<i32>,
let PrintMethod = "printAddrMode6Operand";
let MIOperandInfo = (ops GPR:$addr, i32imm);
let EncoderMethod = "getAddrMode6DupAddressOpValue";
+ // FIXME: This is close, but not quite right. The alignment specifier is
+ // different.
+ let ParserMatchClass = AddrMode6AsmOperand;
}
// addrmodepc := pc + reg
@@ -2753,23 +2804,25 @@ defm STRHT : AI3strT<0b1011, "strht">;
// Load / store multiple Instructions.
//
-multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
+multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f,
InstrItinClass itin, InstrItinClass itin_upd> {
// IA is the default, so no need for an explicit suffix on the
// mnemonic here. Without it is the cannonical spelling.
def IA :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
- !strconcat(asm, "${p}\t$Rn, $regs"), "", []> {
+ !strconcat(asm, "${p}\t$Rn, $regs", sfx), "", []> {
let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = P_bit;
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
}
def IA_UPD :
AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeUpd, f, itin_upd,
- !strconcat(asm, "${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ !strconcat(asm, "${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
let Inst{24-23} = 0b01; // Increment After
+ let Inst{22} = P_bit;
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
@@ -2778,16 +2831,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
def DA :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
- !strconcat(asm, "da${p}\t$Rn, $regs"), "", []> {
+ !strconcat(asm, "da${p}\t$Rn, $regs", sfx), "", []> {
let Inst{24-23} = 0b00; // Decrement After
+ let Inst{22} = P_bit;
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
}
def DA_UPD :
AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeUpd, f, itin_upd,
- !strconcat(asm, "da${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ !strconcat(asm, "da${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
let Inst{24-23} = 0b00; // Decrement After
+ let Inst{22} = P_bit;
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
@@ -2796,16 +2851,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
def DB :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
- !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+ !strconcat(asm, "db${p}\t$Rn, $regs", sfx), "", []> {
let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = P_bit;
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
}
def DB_UPD :
AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeUpd, f, itin_upd,
- !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ !strconcat(asm, "db${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
let Inst{24-23} = 0b10; // Decrement Before
+ let Inst{22} = P_bit;
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
@@ -2814,16 +2871,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
def IB :
AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeNone, f, itin,
- !strconcat(asm, "ib${p}\t$Rn, $regs"), "", []> {
+ !strconcat(asm, "ib${p}\t$Rn, $regs", sfx), "", []> {
let Inst{24-23} = 0b11; // Increment Before
+ let Inst{22} = P_bit;
let Inst{21} = 0; // No writeback
let Inst{20} = L_bit;
}
def IB_UPD :
AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
IndexModeUpd, f, itin_upd,
- !strconcat(asm, "ib${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+ !strconcat(asm, "ib${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
let Inst{24-23} = 0b11; // Increment Before
+ let Inst{22} = P_bit;
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
@@ -2834,10 +2893,12 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
let neverHasSideEffects = 1 in {
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
-defm LDM : arm_ldst_mult<"ldm", 1, LdStMulFrm, IIC_iLoad_m, IIC_iLoad_mu>;
+defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m,
+ IIC_iLoad_mu>;
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
-defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>;
+defm STM : arm_ldst_mult<"stm", "", 0, 0, LdStMulFrm, IIC_iStore_m,
+ IIC_iStore_mu>;
} // neverHasSideEffects
@@ -2851,6 +2912,16 @@ def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
(LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
RegConstraint<"$Rn = $wb">;
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm sysLDM : arm_ldst_mult<"ldm", " ^", 1, 1, LdStMulFrm, IIC_iLoad_m,
+ IIC_iLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m,
+ IIC_iStore_mu>;
+
+
+
//===----------------------------------------------------------------------===//
// Move Instructions.
//
@@ -4999,6 +5070,32 @@ def : MnemonicAlias<"usubaddx", "usax">;
// for isel.
def : ARMInstAlias<"mov${s}${p} $Rd, $imm",
(MVNi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"mvn${s}${p} $Rd, $imm",
+ (MOVi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>;
+// Same for AND <--> BIC
+def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+ (ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"bic${s}${p} $Rdn, $imm",
+ (ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm",
+ (BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"and${s}${p} $Rdn, $imm",
+ (BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+
+// Likewise, "add Rd, so_imm_neg" -> sub
+def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (SUBri GPR:$Rd, GPR:$Rn, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"add${s}${p} $Rd, $imm",
+ (SUBri GPR:$Rd, GPR:$Rd, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
+// Same for CMP <--> CMN via so_imm_neg
+def : ARMInstAlias<"cmp${p} $Rd, $imm",
+ (CMNzri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
+def : ARMInstAlias<"cmn${p} $Rd, $imm",
+ (CMPri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
// The shifter forms of the MOV instruction are aliased to the ASR, LSL,
// LSR, ROR, and RRX instructions.
@@ -5056,4 +5153,8 @@ def : ARMInstAlias<"ror${s}${p} $Rn, $Rm",
// 'mul' instruction can be specified with only two operands.
def : ARMInstAlias<"mul${s}${p} $Rn, $Rm",
- (MUL rGPR:$Rn, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+ (MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p, cc_out:$s)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : ARMInstAlias<"neg${s}${p} $Rd, $Rm",
+ (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index f2ca963..c40860d 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -74,9 +74,11 @@ def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
let MIOperandInfo = (ops i32imm);
}
+// Register list of one D register.
def VecListOneDAsmOperand : AsmOperandClass {
let Name = "VecListOneD";
let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
}
def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
let ParserMatchClass = VecListOneDAsmOperand;
@@ -85,6 +87,7 @@ def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
def VecListTwoDAsmOperand : AsmOperandClass {
let Name = "VecListTwoD";
let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
}
def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> {
let ParserMatchClass = VecListTwoDAsmOperand;
@@ -93,6 +96,7 @@ def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> {
def VecListThreeDAsmOperand : AsmOperandClass {
let Name = "VecListThreeD";
let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
}
def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
let ParserMatchClass = VecListThreeDAsmOperand;
@@ -101,6 +105,7 @@ def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
def VecListFourDAsmOperand : AsmOperandClass {
let Name = "VecListFourD";
let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
}
def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
let ParserMatchClass = VecListFourDAsmOperand;
@@ -109,11 +114,92 @@ def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
def VecListTwoQAsmOperand : AsmOperandClass {
let Name = "VecListTwoQ";
let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
}
-def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwo"> {
+def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwoSpaced"> {
let ParserMatchClass = VecListTwoQAsmOperand;
}
+// Register list of one D register, with "all lanes" subscripting.
+def VecListOneDAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
+ let ParserMatchClass = VecListOneDAllLanesAsmOperand;
+}
+// Register list of two D registers, with "all lanes" subscripting.
+def VecListTwoDAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDAllLanes";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListOperands";
+}
+def VecListTwoDAllLanes : RegisterOperand<DPR, "printVectorListTwoAllLanes"> {
+ let ParserMatchClass = VecListTwoDAllLanesAsmOperand;
+}
+
+// Register list of one D register, with byte lane subscripting.
+def VecListOneDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListOneDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListOneDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListOneDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// Register list of two D registers, with byte lane subscripting.
+def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDByteIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDByteIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDHWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDHWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoDWordIndexed";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDWordIndexed : Operand<i32> {
+ let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
+ let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
//===----------------------------------------------------------------------===//
// NEON-specific DAG Nodes.
//===----------------------------------------------------------------------===//
@@ -272,12 +358,23 @@ class VLDQWBregisterPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
(ins addrmode6:$addr, rGPR:$offset), itin,
"$addr.addr = $wb">;
+
class VLDQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
class VLDQQWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset), itin,
"$addr.addr = $wb">;
+class VLDQQWBfixedPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr), itin,
+ "$addr.addr = $wb">;
+class VLDQQWBregisterPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, rGPR:$offset), itin,
+ "$addr.addr = $wb">;
+
+
class VLDQQQQPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
"$src = $dst">;
@@ -462,31 +559,23 @@ defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">;
def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
// VLD2 : Vector Load (multiple 2-element structures)
-class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy>
+class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
+ InstrItinClass itin>
: NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
- (ins addrmode6:$Rn), IIC_VLD2,
- "vld2", Dt, "$Vd, $Rn", "", []> {
- let Rm = 0b1111;
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
-}
-class VLD2Q<bits<4> op7_4, string Dt, RegisterOperand VdTy>
- : NLdSt<0, 0b10, 0b0011, op7_4,
- (outs VdTy:$Vd),
- (ins addrmode6:$Rn), IIC_VLD2x2,
+ (ins addrmode6:$Rn), itin,
"vld2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDInstruction";
}
-def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8", VecListTwoD>;
-def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16", VecListTwoD>;
-def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32", VecListTwoD>;
+def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2>;
+def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2>;
+def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2>;
-def VLD2q8 : VLD2Q<{0,0,?,?}, "8", VecListFourD>;
-def VLD2q16 : VLD2Q<{0,1,?,?}, "16", VecListFourD>;
-def VLD2q32 : VLD2Q<{1,0,?,?}, "32", VecListFourD>;
+def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>;
+def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>;
+def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>;
def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>;
def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>;
@@ -497,47 +586,56 @@ def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
// ...with address register writeback:
-class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy>
- : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u,
- "vld2", Dt, "$Vd, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
-}
-class VLD2QWB<bits<4> op7_4, string Dt, RegisterOperand VdTy>
- : NLdSt<0, 0b10, 0b0011, op7_4,
- (outs VdTy:$Vd, GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u,
- "vld2", Dt, "$Vd, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVLDInstruction";
+multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
+ RegisterOperand VdTy, InstrItinClass itin> {
+ def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn), itin,
+ "vld2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm), itin,
+ "vld2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVLDInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>;
-def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>;
-def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>;
+defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2u>;
+defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2u>;
+defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2u>;
-def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8", VecListFourD>;
-def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16", VecListFourD>;
-def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32", VecListFourD>;
+defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>;
+defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>;
+defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>;
-def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
-def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
-def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2d8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>;
+def VLD2d16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>;
+def VLD2d32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>;
+def VLD2d8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>;
+def VLD2d16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>;
+def VLD2d32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>;
-def VLD2q8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
-def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
-def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
// ...with double-spaced registers
-def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8", VecListTwoQ>;
-def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
-def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
-def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>;
-def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
-def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
+def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2>;
+def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2>;
+def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2>;
+defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2u>;
+defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2u>;
+defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2u>;
// VLD3 : Vector Load (multiple 3-element structures)
class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -997,9 +1095,11 @@ def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
// VLD1DUP : Vector Load (single element to all lanes)
class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn),
- IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "",
- [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
+ (ins addrmode6dup:$Rn),
+ IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
+ [(set VecListOneDAllLanes:$Vd,
+ (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
@@ -1025,9 +1125,9 @@ def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
class VLD1QDUP<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2),
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd),
(ins addrmode6dup:$Rn), IIC_VLD1dup,
- "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+ "vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
@@ -1038,32 +1138,63 @@ def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
// ...with address register writeback:
-class VLD1DUPWB<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb),
- (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
- "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLD1DupInstruction";
+multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListOneDAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListOneDAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-class VLD1QDUPWB<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
- (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
- "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVLD1DupInstruction";
+multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListTwoDAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbFixed";
+ }
+ def _register : NLdSt<1, 0b10, 0b1100, op7_4,
+ (outs VecListTwoDAllLanes:$Vd, GPR:$wb),
+ (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+ "vld1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+ let DecoderMethod = "DecodeVLD1DupInstruction";
+ let AsmMatchConverter = "cvtVLDwbRegister";
+ }
}
-def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">;
-def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">;
-def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">;
+defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">;
+defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">;
+defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">;
-def VLD1DUPq8_UPD : VLD1QDUPWB<{0,0,1,0}, "8">;
-def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">;
-def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">;
+defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">;
+defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">;
+defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">;
-def VLD1DUPq8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
-def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
-def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+def VLD1DUPq8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
+def VLD1DUPq16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
+def VLD1DUPq32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
+def VLD1DUPq8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
+def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
+def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
class VLD2DUP<bits<4> op7_4, string Dt>
@@ -1329,94 +1460,109 @@ def VST1q64PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>;
// ...with 3 registers
class VST1D3<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3),
- IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+ (ins addrmode6:$Rn, VecListThreeD:$Vd),
+ IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVSTInstruction";
}
-class VST1D3WB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm,
- DPR:$Vd, DPR:$src2, DPR:$src3),
- IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{4} = Rn{4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST1D3WB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
+ IIC_VLD1x3u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-def VST1d8T : VST1D3<{0,0,0,?}, "8">;
-def VST1d16T : VST1D3<{0,1,0,?}, "16">;
-def VST1d32T : VST1D3<{1,0,0,?}, "32">;
-def VST1d64T : VST1D3<{1,1,0,?}, "64">;
+def VST1d8T : VST1D3<{0,0,0,?}, "8">;
+def VST1d16T : VST1D3<{0,1,0,?}, "16">;
+def VST1d32T : VST1D3<{1,0,0,?}, "32">;
+def VST1d64T : VST1D3<{1,1,0,?}, "64">;
-def VST1d8T_UPD : VST1D3WB<{0,0,0,?}, "8">;
-def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">;
-def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">;
-def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">;
+defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">;
+defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">;
+defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">;
+defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">;
-def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
-def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
+def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
// ...with 4 registers
class VST1D4<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "",
+ (ins addrmode6:$Rn, VecListFourD:$Vd),
+ IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
[]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
}
-class VST1D4WB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm,
- DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u,
- "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST1D4WB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
+ "vst1", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+ IIC_VLD1x4u,
+ "vst1", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
-def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
-def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
-def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
+def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
+def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
+def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
+def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
-def VST1d8Q_UPD : VST1D4WB<{0,0,?,?}, "8">;
-def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">;
-def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">;
-def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">;
+defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">;
+defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">;
+defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">;
+defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">;
-def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
-def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
+def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
// VST2 : Vector Store (multiple 2-element structures)
-class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, op11_8, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2),
- IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
- let Rm = 0b1111;
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
-}
-class VST2Q<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0011, op7_4, (outs),
- (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
- "", []> {
+class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
+ InstrItinClass itin>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd),
+ itin, "vst2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVSTInstruction";
}
-def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">;
-def VST2d16 : VST2D<0b1000, {0,1,?,?}, "16">;
-def VST2d32 : VST2D<0b1000, {1,0,?,?}, "32">;
+def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VST2>;
+def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VST2>;
+def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VST2>;
-def VST2q8 : VST2Q<{0,0,?,?}, "8">;
-def VST2q16 : VST2Q<{0,1,?,?}, "16">;
-def VST2q32 : VST2Q<{1,0,?,?}, "32">;
+def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>;
+def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>;
+def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>;
def VST2d8Pseudo : VSTQPseudo<IIC_VST2>;
def VST2d16Pseudo : VSTQPseudo<IIC_VST2>;
@@ -1427,47 +1573,76 @@ def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
// ...with address register writeback:
-class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
- IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
+ RegisterOperand VdTy> {
+ def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-class VST2QWB<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, am6offset:$Rm,
- DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u,
- "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
- "$Rn.addr = $wb", []> {
- let Inst{5-4} = Rn{5-4};
- let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST2QWB<bits<4> op7_4, string Dt> {
+ def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn!",
+ "$Rn.addr = $wb", []> {
+ let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbFixed";
+ }
+ def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+ (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+ IIC_VLD1u,
+ "vst2", Dt, "$Vd, $Rn, $Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{5-4} = Rn{5-4};
+ let DecoderMethod = "DecodeVSTInstruction";
+ let AsmMatchConverter = "cvtVSTwbRegister";
+ }
}
-def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">;
-def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">;
-def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">;
+defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>;
+defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>;
+defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>;
-def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">;
-def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">;
-def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">;
+defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">;
+defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">;
+defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">;
-def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
-def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
-def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2d8PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>;
+def VST2d16PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>;
+def VST2d32PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>;
+def VST2d8PseudoWB_register : VSTQWBPseudo<IIC_VST2u>;
+def VST2d16PseudoWB_register : VSTQWBPseudo<IIC_VST2u>;
+def VST2d32PseudoWB_register : VSTQWBPseudo<IIC_VST2u>;
-def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
-def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
-def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q8PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q16PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q32PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q8PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q16PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q32PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>;
// ...with double-spaced registers
-def VST2b8 : VST2D<0b1001, {0,0,?,?}, "8">;
-def VST2b16 : VST2D<0b1001, {0,1,?,?}, "16">;
-def VST2b32 : VST2D<0b1001, {1,0,?,?}, "32">;
-def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8">;
-def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">;
-def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">;
+def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VST2>;
+def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VST2>;
+def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VST2>;
+defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>;
+defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
+defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
// VST3 : Vector Store (multiple 3-element structures)
class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1741,10 +1916,10 @@ def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
// ...with address register writeback:
class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
- "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
- "$addr.addr = $wb", []> {
+ (ins addrmode6:$Rn, am6offset:$Rm,
+ DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
+ "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVST2LN";
}
@@ -2573,9 +2748,9 @@ class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
// Long shift by immediate.
class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, op6, op4,
- (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm,
+ (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
[(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm),
(i32 imm:$SIMM))))]>;
@@ -2805,14 +2980,11 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
v4i32, v4i32, OpNode, Commutable>;
}
-multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
- def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
- v4i16, ShOp>;
- def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"),
- v2i32, ShOp>;
- def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
- v8i16, v4i16, ShOp>;
- def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"),
+multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
+ def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
+ def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
+ def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
+ def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
v4i32, v2i32, ShOp>;
}
@@ -3477,15 +3649,15 @@ multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
- OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> {
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
- OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> {
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
- OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> {
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
}
@@ -3574,7 +3746,7 @@ def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
v2f32, v2f32, fmul, 1>;
def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
v4f32, v4f32, fmul, 1>;
-defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>;
+defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>;
def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
v2f32, fmul>;
@@ -4285,18 +4457,18 @@ defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
// VSHLL : Vector Shift Left Long (with maximum shift count)
class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
- ValueType OpTy, SDNode OpNode>
+ ValueType OpTy, Operand ImmTy, SDNode OpNode>
: N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
- ResTy, OpTy, OpNode> {
+ ResTy, OpTy, ImmTy, OpNode> {
let Inst{21-16} = op21_16;
let DecoderMethod = "DecodeVSHLMaxInstruction";
}
def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
- v8i16, v8i8, NEONvshlli>;
+ v8i16, v8i8, imm8, NEONvshlli>;
def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
- v4i32, v4i16, NEONvshlli>;
+ v4i32, v4i16, imm16, NEONvshlli>;
def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
- v2i64, v2i32, NEONvshlli>;
+ v2i64, v2i32, imm32, NEONvshlli>;
// VSHRN : Vector Shift Right and Narrow
defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
@@ -4469,10 +4641,6 @@ def : InstAlias<"vmov${p} $Vd, $Vm",
(VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
def : InstAlias<"vmov${p} $Vd, $Vm",
(VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
-defm : VFPDTAnyNoF64InstAlias<"vmov${p}", "$Vd, $Vm",
- (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
-defm : VFPDTAnyNoF64InstAlias<"vmov${p}", "$Vd, $Vm",
- (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
// VMOV : Vector Move (Immediate)
@@ -4932,34 +5100,34 @@ def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
// VEXT : Vector Extract
-class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
+class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
: N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm,
+ (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
[(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
- (Ty DPR:$Vm), imm:$index)))]> {
+ (Ty DPR:$Vm), imm:$index)))]> {
bits<4> index;
let Inst{11-8} = index{3-0};
}
-class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
+class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
: N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
- (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm,
+ (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
[(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
- (Ty QPR:$Vm), imm:$index)))]> {
+ (Ty QPR:$Vm), imm:$index)))]> {
bits<4> index;
let Inst{11-8} = index{3-0};
}
-def VEXTd8 : VEXTd<"vext", "8", v8i8> {
+def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
let Inst{11-8} = index{3-0};
}
-def VEXTd16 : VEXTd<"vext", "16", v4i16> {
+def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
let Inst{11-9} = index{2-0};
let Inst{8} = 0b0;
}
-def VEXTd32 : VEXTd<"vext", "32", v2i32> {
+def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
let Inst{11-10} = index{1-0};
let Inst{9-8} = 0b00;
}
@@ -4968,17 +5136,21 @@ def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
(i32 imm:$index))),
(VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
-def VEXTq8 : VEXTq<"vext", "8", v16i8> {
+def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> {
let Inst{11-8} = index{3-0};
}
-def VEXTq16 : VEXTq<"vext", "16", v8i16> {
+def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
let Inst{11-9} = index{2-0};
let Inst{8} = 0b0;
}
-def VEXTq32 : VEXTq<"vext", "32", v4i32> {
+def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
let Inst{11-10} = index{1-0};
let Inst{9-8} = 0b00;
}
+def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
+ let Inst{11} = index{0};
+ let Inst{10-8} = 0b000;
+}
def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
(v4f32 QPR:$Vm),
(i32 imm:$index))),
@@ -5026,17 +5198,17 @@ def VTBL1
let hasExtraSrcRegAllocReq = 1 in {
def VTBL2
: N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2,
- "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>;
+ (ins VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
def VTBL3
: N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3,
- "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>;
+ (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
def VTBL4
: N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
- (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm),
+ (ins VecListFourD:$Vn, DPR:$Vm),
NVTBLFrm, IIC_VTB4,
- "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>;
+ "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
} // hasExtraSrcRegAllocReq = 1
def VTBL2Pseudo
@@ -5056,18 +5228,18 @@ def VTBX1
let hasExtraSrcRegAllocReq = 1 in {
def VTBX2
: N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
- (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
- "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>;
+ (ins DPR:$orig, VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
+ "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
def VTBX3
: N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
- (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm),
+ (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
NVTBLFrm, IIC_VTBX3,
- "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm",
+ "vtbx", "8", "$Vd, $Vn, $Vm",
"$orig = $Vd", []>;
def VTBX4
- : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn,
- DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
- "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm",
+ : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
+ (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
+ "vtbx", "8", "$Vd, $Vn, $Vm",
"$orig = $Vd", []>;
} // hasExtraSrcRegAllocReq = 1
@@ -5207,11 +5379,83 @@ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
// Assembler aliases
//
-// VAND/VEOR/VORR accept but do not require a type suffix.
+def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
+ (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
+def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
+ (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
+
+
+// VADD two-operand aliases.
+def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm",
+ (VADDv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm",
+ (VADDv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm",
+ (VADDv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm",
+ (VADDv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm",
+ (VADDv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm",
+ (VADDv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm",
+ (VADDv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm",
+ (VADDv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm",
+ (VADDfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm",
+ (VADDfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSUB two-operand aliases.
+def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm",
+ (VSUBv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm",
+ (VSUBv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm",
+ (VSUBv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm",
+ (VSUBv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm",
+ (VSUBv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm",
+ (VSUBv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm",
+ (VSUBv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm",
+ (VSUBv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm",
+ (VSUBfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm",
+ (VSUBfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VADDW two-operand aliases.
+def : NEONInstAlias<"vaddw${p}.s8 $Vdn, $Vm",
+ (VADDWsv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.s16 $Vdn, $Vm",
+ (VADDWsv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.s32 $Vdn, $Vm",
+ (VADDWsv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u8 $Vdn, $Vm",
+ (VADDWuv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u16 $Vdn, $Vm",
+ (VADDWuv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm",
+ (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+// VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
(VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
(VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
+ (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
+ (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
(VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
@@ -5220,245 +5464,450 @@ defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
(VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
(VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
-
-// VLD1 requires a size suffix, but also accepts type specific variants.
-// Load one D register.
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d8 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d16 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d32 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d64 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>;
-// with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d8wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d16wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d32wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d64wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-// with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d8wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d16wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d32wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d64wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-
-// Load two D registers.
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1q8 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1q16 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1q32 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1q64 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>;
-// with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1q8wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1q16wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1q32wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1q64wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-// with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1q8wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1q16wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1q32wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1q64wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn,
- rGPR:$Rm, pred:$p)>;
-
-// Load three D registers.
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d8T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d16T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d32T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d64T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>;
-// with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d8Twb_fixed VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d16Twb_fixed VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d32Twb_fixed VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d64Twb_fixed VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-// with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d8Twb_register VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d16Twb_register VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d32Twb_register VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d64Twb_register VecListThreeD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-
-
-// Load four D registers.
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d8Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d16Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d32Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn",
- (VLD1d64Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>;
-// with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d8Qwb_fixed VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d16Qwb_fixed VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d32Qwb_fixed VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
- (VLD1d64Qwb_fixed VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, pred:$p)>;
-// with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d8Qwb_register VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d16Qwb_register VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d32Qwb_register VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
- (VLD1d64Qwb_register VecListFourD:$Vd, zero_reg,
- addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-
-// VST1 requires a size suffix, but also accepts type specific variants.
-// Store one D register.
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1d8 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1d16 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1d32 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1d64 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-// with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1d8wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1d16wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1d32wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1d64wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-// with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1d8wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm,
- VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1d16wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm,
- VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1d32wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm,
- VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1d64wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm,
- VecListOneD:$Vd, pred:$p)>;
-
-// Store two D registers.
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1q8 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1q16 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1q32 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn",
- (VST1q64 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-// with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1q8wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1q16wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1q32wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
- (VST1q64wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-// with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1q8wb_register zero_reg, addrmode6:$Rn,
- rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1q16wb_register zero_reg, addrmode6:$Rn,
- rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1q32wb_register zero_reg, addrmode6:$Rn,
- rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
- (VST1q64wb_register zero_reg, addrmode6:$Rn,
- rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>;
-
-// FIXME: The three and four register VST1 instructions haven't been moved
-// to the VecList* encoding yet, so we can't do assembly parsing support
-// for them. Uncomment these when that happens.
-// Load three D registers.
-//defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d8T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>;
-//defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d16T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>;
-//defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d32T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>;
-//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d64T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>;
-
-// Load four D registers.
-//defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d8Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>;
-//defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d16Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>;
-//defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d32Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>;
-//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-// (VST1d64Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>;
-
-
-// VTRN instructions data type suffix aliases for more-specific types.
-defm : VFPDT8ReqInstAlias <"vtrn${p}", "$Dd, $Dm",
- (VTRNd8 DPR:$Dd, DPR:$Dm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vtrn${p}", "$Dd, $Dm",
- (VTRNd16 DPR:$Dd, DPR:$Dm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vtrn${p}", "$Dd, $Dm",
- (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
-
-defm : VFPDT8ReqInstAlias <"vtrn${p}", "$Qd, $Qm",
- (VTRNq8 QPR:$Qd, QPR:$Qm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vtrn${p}", "$Qd, $Qm",
- (VTRNq16 QPR:$Qd, QPR:$Qm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vtrn${p}", "$Qd, $Qm",
- (VTRNq32 QPR:$Qd, QPR:$Qm, pred:$p)>;
+// ... two-operand aliases
+def : NEONInstAlias<"vand${p} $Vdn, $Vm",
+ (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vand${p} $Vdn, $Vm",
+ (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vbic${p} $Vdn, $Vm",
+ (VBICd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vbic${p} $Vdn, $Vm",
+ (VBICq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"veor${p} $Vdn, $Vm",
+ (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"veor${p} $Vdn, $Vm",
+ (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vorr${p} $Vdn, $Vm",
+ (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vorr${p} $Vdn, $Vm",
+ (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
+ (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
+ (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
+ (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
+ (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
+ (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
+ (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VMUL two-operand aliases.
+def : NEONInstAlias<"vmul${p}.p8 $Qdn, $Qm",
+ (VMULpq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i8 $Qdn, $Qm",
+ (VMULv16i8 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Qm",
+ (VMULv8i16 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Qm",
+ (VMULv4i32 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.p8 $Ddn, $Dm",
+ (VMULpd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i8 $Ddn, $Dm",
+ (VMULv8i8 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm",
+ (VMULv4i16 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm",
+ (VMULv2i32 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Qm",
+ (VMULfq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm",
+ (VMULfd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane",
+ (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm,
+ VectorIndex16:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane",
+ (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm,
+ VectorIndex16:$lane, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane",
+ (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
+ VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane",
+ (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
+ VectorIndex32:$lane, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane",
+ (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
+ VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane",
+ (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
+ VectorIndex32:$lane, pred:$p)>;
+
+// VQADD (register) two-operand aliases.
+def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm",
+ (VQADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm",
+ (VQADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm",
+ (VQADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm",
+ (VQADDsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm",
+ (VQADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm",
+ (VQADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm",
+ (VQADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm",
+ (VQADDuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm",
+ (VQADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm",
+ (VQADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm",
+ (VQADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm",
+ (VQADDsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm",
+ (VQADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm",
+ (VQADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm",
+ (VQADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm",
+ (VQADDuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSHL (immediate) two-operand aliases.
+def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm",
+ (VSHLiv8i8 DPR:$Vdn, DPR:$Vdn, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm",
+ (VSHLiv4i16 DPR:$Vdn, DPR:$Vdn, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm",
+ (VSHLiv2i32 DPR:$Vdn, DPR:$Vdn, imm0_31:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm",
+ (VSHLiv1i64 DPR:$Vdn, DPR:$Vdn, imm0_63:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm",
+ (VSHLiv16i8 QPR:$Vdn, QPR:$Vdn, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm",
+ (VSHLiv8i16 QPR:$Vdn, QPR:$Vdn, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm",
+ (VSHLiv4i32 QPR:$Vdn, QPR:$Vdn, imm0_31:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm",
+ (VSHLiv2i64 QPR:$Vdn, QPR:$Vdn, imm0_63:$imm, pred:$p)>;
+
+// VSHL (register) two-operand aliases.
+def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm",
+ (VSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm",
+ (VSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm",
+ (VSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm",
+ (VSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm",
+ (VSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm",
+ (VSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm",
+ (VSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm",
+ (VSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm",
+ (VSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm",
+ (VSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm",
+ (VSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm",
+ (VSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm",
+ (VSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm",
+ (VSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm",
+ (VSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm",
+ (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSHL (immediate) two-operand aliases.
+def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm",
+ (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm",
+ (VSHRsv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm",
+ (VSHRsv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm",
+ (VSHRsv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm",
+ (VSHRsv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm",
+ (VSHRsv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm",
+ (VSHRsv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm",
+ (VSHRsv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm",
+ (VSHRuv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm",
+ (VSHRuv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm",
+ (VSHRuv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm",
+ (VSHRuv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm",
+ (VSHRuv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm",
+ (VSHRuv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm",
+ (VSHRuv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm",
+ (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+// VLD1 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+defm VLD1LNdAsm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD1LNdAsm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD1LNdAsm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+defm VLD1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr!",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr!",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr!",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD1LNdWB_register_Asm :
+ NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VLD1LNdWB_register_Asm :
+ NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VLD1LNdWB_register_Asm :
+ NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST1 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+defm VST1LNdAsm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST1LNdAsm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST1LNdAsm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+defm VST1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr!",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr!",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr!",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST1LNdWB_register_Asm :
+ NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm",
+ (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VST1LNdWB_register_Asm :
+ NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm",
+ (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VST1LNdWB_register_Asm :
+ NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm",
+ (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VLD2 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+defm VLD2LNdAsm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD2LNdAsm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD2LNdAsm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+defm VLD2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr!",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr!",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr!",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD2LNdWB_register_Asm :
+ NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VLD2LNdWB_register_Asm :
+ NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VLD2LNdWB_register_Asm :
+ NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+
+// VST2 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+defm VST2LNdAsm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST2LNdAsm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST2LNdAsm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+defm VST2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr!",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr!",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr!",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST2LNdWB_register_Asm :
+ NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm",
+ (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VST2LNdWB_register_Asm :
+ NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm",
+ (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+defm VST2LNdWB_register_Asm :
+ NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm",
+ (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
+ rGPR:$Rm, pred:$p)>;
+
+// VMOV takes an optional datatype suffix
+defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
+ (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
+ (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+
+// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
+// D-register versions.
+def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
+ (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
+ (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
+ (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
+ (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
+ (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
+ (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
+ (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+// Q-register versions.
+def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
+ (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
+ (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
+ (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
+ (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
+ (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
+ (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
+ (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+
+// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
+// D-register versions.
+def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
+ (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
+ (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
+ (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
+ (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
+ (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
+ (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
+ (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+// Q-register versions.
+def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
+ (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
+ (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
+ (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
+ (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
+ (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
+ (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
+ (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+
+// Two-operand variants for VEXT
+def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm",
+ (VEXTd8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm",
+ (VEXTd16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_3:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm",
+ (VEXTd32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_1:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm",
+ (VEXTq8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm",
+ (VEXTq16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm",
+ (VEXTq32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_3:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.64 $Vdn, $Vm, $imm",
+ (VEXTq64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_1:$imm, pred:$p)>;
+
+// Two-operand variants for VQDMULH
+def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm",
+ (VQDMULHv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm",
+ (VQDMULHv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm",
+ (VQDMULHv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm",
+ (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
+// these should restrict to just the Q register variants, but the register
+// classes are enough to match correctly regardless, so we keep it simple
+// and just use MnemonicAlias.
+def : NEONMnemonicAlias<"vbicq", "vbic">;
+def : NEONMnemonicAlias<"vandq", "vand">;
+def : NEONMnemonicAlias<"veorq", "veor">;
+def : NEONMnemonicAlias<"vorrq", "vorr">;
+
+def : NEONMnemonicAlias<"vmovq", "vmov">;
+def : NEONMnemonicAlias<"vmvnq", "vmvn">;
+// Explicit versions for floating point so that the FPImm variants get
+// handled early. The parser gets confused otherwise.
+def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
+def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
+
+def : NEONMnemonicAlias<"vaddq", "vadd">;
+def : NEONMnemonicAlias<"vsubq", "vsub">;
+
+def : NEONMnemonicAlias<"vminq", "vmin">;
+def : NEONMnemonicAlias<"vmaxq", "vmax">;
+
+def : NEONMnemonicAlias<"vmulq", "vmul">;
+
+def : NEONMnemonicAlias<"vabsq", "vabs">;
+
+def : NEONMnemonicAlias<"vshlq", "vshl">;
+def : NEONMnemonicAlias<"vshrq", "vshr">;
+
+def : NEONMnemonicAlias<"vcvtq", "vcvt">;
+
+def : NEONMnemonicAlias<"vcleq", "vcle">;
+def : NEONMnemonicAlias<"vceqq", "vceq">;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index c6cc98d..ac1a229 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -1131,9 +1131,6 @@ def tRSB : // A8.6.141
"rsb", "\t$Rd, $Rn, #0",
[(set tGPR:$Rd, (ineg tGPR:$Rn))]>;
-def : tInstAlias<"neg${s}${p} $Rd, $Rm",
- (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>;
-
// Subtract with carry register
let Uses = [CPSR] in
def tSBC : // A8.6.151
@@ -1435,3 +1432,8 @@ def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>;
// nothing).
def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : tInstAlias<"neg${s}${p} $Rd, $Rm",
+ (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>;
+
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 6129fa3..981592c 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -80,18 +80,19 @@ def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
// only used on aliases (Pat<> and InstAlias<>). The actual encoding
// is handled by the destination instructions, which use t2_so_imm.
def t2_so_imm_not_asmoperand : AsmOperandClass { let Name = "T2SOImmNot"; }
-def t2_so_imm_not : Operand<i32>,
- PatLeaf<(imm), [{
+def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{
return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1;
}], t2_so_imm_not_XFORM> {
let ParserMatchClass = t2_so_imm_not_asmoperand;
}
// t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
-def t2_so_imm_neg : Operand<i32>,
- PatLeaf<(imm), [{
+def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; }
+def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1;
-}], t2_so_imm_neg_XFORM>;
+}], t2_so_imm_neg_XFORM> {
+ let ParserMatchClass = t2_so_imm_neg_asmoperand;
+}
/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
def imm0_4095 : Operand<i32>,
@@ -1333,7 +1334,7 @@ def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
let mayStore = 1, neverHasSideEffects = 1 in {
def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb),
- (ins rGPR:$Rt, t2addrmode_imm8:$addr),
+ (ins GPRnopc:$Rt, t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
"str", "\t$Rt, $addr!",
"$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
@@ -1357,13 +1358,13 @@ def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb),
} // mayStore = 1, neverHasSideEffects = 1
def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
- (ins rGPR:$Rt, addr_offset_none:$Rn,
+ (ins GPRnopc:$Rt, addr_offset_none:$Rn,
t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
"str", "\t$Rt, $Rn$offset",
"$Rn = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPRnopc:$Rn_wb,
- (post_store rGPR:$Rt, addr_offset_none:$Rn,
+ (post_store GPRnopc:$Rt, addr_offset_none:$Rn,
t2am_imm8_offset:$offset))]>;
def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb),
@@ -3971,6 +3972,18 @@ def : t2InstAlias<"push${p} $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>;
def : t2InstAlias<"pop${p}.w $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
def : t2InstAlias<"pop${p} $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
+// STMIA/STMIA_UPD aliases w/o the optional .w suffix
+def : t2InstAlias<"stm${p} $Rn, $regs",
+ (t2STMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"stm${p} $Rn!, $regs",
+ (t2STMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// LDMIA/LDMIA_UPD aliases w/o the optional .w suffix
+def : t2InstAlias<"ldm${p} $Rn, $regs",
+ (t2LDMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"ldm${p} $Rn!, $regs",
+ (t2LDMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
// STMDB/STMDB_UPD aliases w/ the optional .w suffix
def : t2InstAlias<"stmdb${p}.w $Rn, $regs",
(t2STMDB GPR:$Rn, pred:$p, reglist:$regs)>;
@@ -4084,8 +4097,50 @@ def : t2InstAlias<"sxth${p} $Rd, $Rm$rot",
// for isel.
def : t2InstAlias<"mov${p} $Rd, $imm",
(t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+def : t2InstAlias<"mvn${p} $Rd, $imm",
+ (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+// Same for AND <--> BIC
+def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+ (t2ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"bic${s}${p} $Rdn, $imm",
+ (t2ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm",
+ (t2BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"and${s}${p} $Rdn, $imm",
+ (t2BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ pred:$p, cc_out:$s)>;
+// Likewise, "add Rd, t2_so_imm_neg" -> sub
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm,
+ pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${s}${p} $Rd, $imm",
+ (t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm,
+ pred:$p, cc_out:$s)>;
+// Same for CMP <--> CMN via t2_so_imm_neg
+def : t2InstAlias<"cmp${p} $Rd, $imm",
+ (t2CMNzri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
+def : t2InstAlias<"cmn${p} $Rd, $imm",
+ (t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
// Wide 'mul' encoding can be specified with only two operands.
def : t2InstAlias<"mul${p} $Rn, $Rm",
- (t2MUL rGPR:$Rn, rGPR:$Rn, rGPR:$Rm, pred:$p)>;
+ (t2MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : t2InstAlias<"neg${s}${p} $Rd, $Rm",
+ (t2RSBri rGPR:$Rd, rGPR:$Rm, 0, pred:$p, cc_out:$s)>;
+
+// MOV so_reg assembler pseudos. InstAlias isn't expressive enough for
+// these, unfortunately.
+def t2MOVsi: t2AsmPseudo<"mov${p} $Rd, $shift",
+ (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+def t2MOVSsi: t2AsmPseudo<"movs${p} $Rd, $shift",
+ (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+
+// ADR w/o the .w suffix
+def : t2InstAlias<"adr${p} $Rd, $addr",
+ (t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index e420135..5d43556 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -1160,18 +1160,64 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
//===----------------------------------------------------------------------===//
// Assembler aliases.
//
+// A few mnemnoic aliases for pre-unifixed syntax. We don't guarantee to
+// support them all, but supporting at least some of the basics is
+// good to be friendly.
+def : VFP2MnemonicAlias<"flds", "vldr">;
+def : VFP2MnemonicAlias<"fldd", "vldr">;
+def : VFP2MnemonicAlias<"fmrs", "vmov">;
+def : VFP2MnemonicAlias<"fmsr", "vmov">;
+def : VFP2MnemonicAlias<"fsqrts", "vsqrt">;
+def : VFP2MnemonicAlias<"fsqrtd", "vsqrt">;
+def : VFP2MnemonicAlias<"fadds", "vadd.f32">;
+def : VFP2MnemonicAlias<"faddd", "vadd.f64">;
+def : VFP2MnemonicAlias<"fmrdd", "vmov">;
+def : VFP2MnemonicAlias<"fmrds", "vmov">;
+def : VFP2MnemonicAlias<"fmrrd", "vmov">;
+def : VFP2MnemonicAlias<"fmdrr", "vmov">;
+def : VFP2MnemonicAlias<"fmuld", "vmul.f64">;
+def : VFP2MnemonicAlias<"fnegs", "vneg.f32">;
+def : VFP2MnemonicAlias<"fnegd", "vneg.f64">;
+def : VFP2MnemonicAlias<"ftosizd", "vcvt.s32.f64">;
+def : VFP2MnemonicAlias<"ftosid", "vcvtr.s32.f64">;
+def : VFP2MnemonicAlias<"ftosizs", "vcvt.s32.f32">;
+def : VFP2MnemonicAlias<"ftosis", "vcvtr.s32.f32">;
+def : VFP2MnemonicAlias<"ftouizd", "vcvt.u32.f64">;
+def : VFP2MnemonicAlias<"ftouid", "vcvtr.u32.f64">;
+def : VFP2MnemonicAlias<"ftouizs", "vcvt.u32.f32">;
+def : VFP2MnemonicAlias<"ftouis", "vcvtr.u32.f32">;
+def : VFP2MnemonicAlias<"fsitod", "vcvt.f64.s32">;
+def : VFP2MnemonicAlias<"fsitos", "vcvt.f32.s32">;
+def : VFP2MnemonicAlias<"fuitod", "vcvt.f64.u32">;
+def : VFP2MnemonicAlias<"fuitos", "vcvt.f32.u32">;
+def : VFP2MnemonicAlias<"fsts", "vstr">;
+def : VFP2MnemonicAlias<"fstd", "vstr">;
+def : VFP2MnemonicAlias<"fmacd", "vmla.f64">;
+def : VFP2MnemonicAlias<"fmacs", "vmla.f32">;
def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
+def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
+ (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"faddd${p} $Dd, $Dn, $Dm",
+ (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm",
+ (VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"fsubd${p} $Dd, $Dn, $Dm",
+ (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+
+// No need for the size suffix on VSQRT. It's implied by the register classes.
+def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>;
// VLDR/VSTR accept an optional type suffix.
-defm : VFPDT32InstAlias<"vldr${p}", "$Sd, $addr",
- (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
-defm : VFPDT32InstAlias<"vstr${p}", "$Sd, $addr",
- (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
-defm : VFPDT64InstAlias<"vldr${p}", "$Dd, $addr",
- (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
-defm : VFPDT64InstAlias<"vstr${p}", "$Dd, $addr",
- (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr",
+ (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vstr${p}.32 $Sd, $addr",
+ (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vldr${p}.64 $Dd, $addr",
+ (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vstr${p}.64 $Dd, $addr",
+ (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
// VMUL has a two-operand form (implied destination operand)
def : VFP2InstAlias<"vmul${p}.f64 $Dn, $Dm",
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index c8728f4..6712fb6 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -33,6 +33,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -1471,19 +1472,18 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
while (++I != E) {
if (I->isDebugValue() || MemOps.count(&*I))
continue;
- const MCInstrDesc &MCID = I->getDesc();
- if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
+ if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
return false;
- if (isLd && MCID.mayStore())
+ if (isLd && I->mayStore())
return false;
if (!isLd) {
- if (MCID.mayLoad())
+ if (I->mayLoad())
return false;
// It's not safe to move the first 'str' down.
// str r1, [r0]
// strh r5, [r0]
// str r4, [r0, #+4]
- if (MCID.mayStore())
+ if (I->mayStore())
return false;
}
for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
@@ -1773,8 +1773,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
while (MBBI != E) {
for (; MBBI != E; ++MBBI) {
MachineInstr *MI = MBBI;
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.isCall() || MCID.isTerminator()) {
+ if (MI->isCall() || MI->isTerminator()) {
// Stop at barriers.
++MBBI;
break;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 6cbb24b..61b75cb 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -38,22 +38,25 @@ extern "C" void LLVMInitializeARMTarget() {
///
ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS),
JITInfo(),
InstrItins(Subtarget.getInstrItineraryData()) {
// Default to soft float ABI
- if (FloatABIType == FloatABI::Default)
- FloatABIType = FloatABI::Soft;
+ if (Options.FloatABIType == FloatABI::Default)
+ this->Options.FloatABIType = FloatABI::Soft;
}
ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM, OL), InstrInfo(Subtarget),
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ InstrInfo(Subtarget),
DataLayout(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:64-i64:32:64-"
"v128:32:128-v64:32:64-n32-S32") :
@@ -73,9 +76,10 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
InstrInfo(Subtarget.hasThumb2()
? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
: ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
@@ -143,10 +147,16 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM) {
}
bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM) {
- if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb())
- PM.add(createThumb2SizeReductionPass());
+ if (Subtarget.isThumb2()) {
+ if (!Subtarget.prefers32BitThumb())
+ PM.add(createThumb2SizeReductionPass());
+
+ // Constant island pass work on unbundled instructions.
+ PM.add(createUnpackMachineBundlesPass());
+ }
PM.add(createARMConstantIslandPass());
+
return true;
}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index a1f517b..cd77822 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -41,6 +41,7 @@ private:
public:
ARMBaseTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
@@ -71,6 +72,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
public:
ARMTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
@@ -112,6 +114,7 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
public:
ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 19defa1..721a225 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -36,6 +36,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
ELF::SHF_WRITE |
ELF::SHF_ALLOC,
SectionKind::getDataRel());
+ StructorOutputOrder = Structors::PriorityOrder;
LSDASection = NULL;
}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index bb83e5e..cd86065 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -39,10 +39,15 @@ namespace {
class ARMOperand;
+enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
+
class ARMAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
+ // Map of register aliases registers via the .req directive.
+ StringMap<unsigned> RegisterReqs;
+
struct {
ARMCC::CondCodes Cond; // Condition for IT block.
unsigned Mask:4; // Condition mask for instructions.
@@ -90,9 +95,12 @@ class ARMAsmParser : public MCTargetAsmParser {
unsigned &ShiftAmount);
bool parseDirectiveWord(unsigned Size, SMLoc L);
bool parseDirectiveThumb(SMLoc L);
+ bool parseDirectiveARM(SMLoc L);
bool parseDirectiveThumbFunc(SMLoc L);
bool parseDirectiveCode(SMLoc L);
bool parseDirectiveSyntax(SMLoc L);
+ bool parseDirectiveReq(StringRef Name, SMLoc L);
+ bool parseDirectiveUnreq(SMLoc L);
StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
bool &CarrySetting, unsigned &ProcessorIMod,
@@ -161,6 +169,7 @@ class ARMAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index);
// Asm Match Converter Methods
bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode,
@@ -271,6 +280,8 @@ class ARMOperand : public MCParsedAsmOperand {
k_DPRRegisterList,
k_SPRRegisterList,
k_VectorList,
+ k_VectorListAllLanes,
+ k_VectorListIndexed,
k_ShiftedRegister,
k_ShiftedImmediate,
k_ShifterImmediate,
@@ -324,6 +335,8 @@ class ARMOperand : public MCParsedAsmOperand {
struct {
unsigned RegNum;
unsigned Count;
+ unsigned LaneIndex;
+ bool isDoubleSpaced;
} VectorList;
struct {
@@ -409,6 +422,8 @@ public:
Registers = o.Registers;
break;
case k_VectorList:
+ case k_VectorListAllLanes:
+ case k_VectorListIndexed:
VectorList = o.VectorList;
break;
case k_CoprocNum:
@@ -562,6 +577,22 @@ public:
int64_t Value = CE->getValue();
return Value >= 0 && Value < 256;
}
+ bool isImm0_1() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 2;
+ }
+ bool isImm0_3() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 4;
+ }
bool isImm0_7() const {
if (Kind != k_Immediate)
return false;
@@ -586,6 +617,94 @@ public:
int64_t Value = CE->getValue();
return Value >= 0 && Value < 32;
}
+ bool isImm0_63() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 64;
+ }
+ bool isImm8() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 8;
+ }
+ bool isImm16() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 16;
+ }
+ bool isImm32() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value == 32;
+ }
+ bool isShrImm8() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 8;
+ }
+ bool isShrImm16() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 16;
+ }
+ bool isShrImm32() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 32;
+ }
+ bool isShrImm64() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= 64;
+ }
+ bool isImm1_7() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 8;
+ }
+ bool isImm1_15() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 16;
+ }
+ bool isImm1_31() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value < 32;
+ }
bool isImm1_16() const {
if (Kind != k_Immediate)
return false;
@@ -676,6 +795,14 @@ public:
int64_t Value = CE->getValue();
return ARM_AM::getSOImmVal(~Value) != -1;
}
+ bool isARMSOImmNeg() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getSOImmVal(-Value) != -1;
+ }
bool isT2SOImm() const {
if (Kind != k_Immediate)
return false;
@@ -692,6 +819,14 @@ public:
int64_t Value = CE->getValue();
return ARM_AM::getT2SOImmVal(~Value) != -1;
}
+ bool isT2SOImmNeg() const {
+ if (Kind != k_Immediate)
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ARM_AM::getT2SOImmVal(-Value) != -1;
+ }
bool isSetEndImm() const {
if (Kind != k_Immediate)
return false;
@@ -892,9 +1027,9 @@ public:
if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset in range [-255, -1].
- if (!Memory.OffsetImm) return true;
+ if (!Memory.OffsetImm) return false;
int64_t Val = Memory.OffsetImm->getValue();
- return Val > -256 && Val < 0;
+ return (Val == INT32_MIN) || (Val > -256 && Val < 0);
}
bool isMemUImm12Offset() const {
if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
@@ -940,31 +1075,75 @@ public:
bool isProcIFlags() const { return Kind == k_ProcIFlags; }
// NEON operands.
+ bool isSingleSpacedVectorList() const {
+ return Kind == k_VectorList && !VectorList.isDoubleSpaced;
+ }
+ bool isDoubleSpacedVectorList() const {
+ return Kind == k_VectorList && VectorList.isDoubleSpaced;
+ }
bool isVecListOneD() const {
- if (Kind != k_VectorList) return false;
+ if (!isSingleSpacedVectorList()) return false;
return VectorList.Count == 1;
}
bool isVecListTwoD() const {
- if (Kind != k_VectorList) return false;
+ if (!isSingleSpacedVectorList()) return false;
return VectorList.Count == 2;
}
bool isVecListThreeD() const {
- if (Kind != k_VectorList) return false;
+ if (!isSingleSpacedVectorList()) return false;
return VectorList.Count == 3;
}
bool isVecListFourD() const {
- if (Kind != k_VectorList) return false;
+ if (!isSingleSpacedVectorList()) return false;
return VectorList.Count == 4;
}
bool isVecListTwoQ() const {
- if (Kind != k_VectorList) return false;
- //FIXME: We haven't taught the parser to handle by-two register lists
- // yet, so don't pretend to know one.
- return VectorList.Count == 2 && false;
+ if (!isDoubleSpacedVectorList()) return false;
+ return VectorList.Count == 2;
+ }
+
+ bool isVecListOneDAllLanes() const {
+ if (Kind != k_VectorListAllLanes) return false;
+ return VectorList.Count == 1;
+ }
+
+ bool isVecListTwoDAllLanes() const {
+ if (Kind != k_VectorListAllLanes) return false;
+ return VectorList.Count == 2;
+ }
+
+ bool isVecListOneDByteIndexed() const {
+ if (Kind != k_VectorListIndexed) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListOneDHWordIndexed() const {
+ if (Kind != k_VectorListIndexed) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListOneDWordIndexed() const {
+ if (Kind != k_VectorListIndexed) return false;
+ return VectorList.Count == 1 && VectorList.LaneIndex <= 1;
+ }
+
+ bool isVecListTwoDByteIndexed() const {
+ if (Kind != k_VectorListIndexed) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 7;
+ }
+
+ bool isVecListTwoDHWordIndexed() const {
+ if (Kind != k_VectorListIndexed) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 3;
+ }
+
+ bool isVecListTwoDWordIndexed() const {
+ if (Kind != k_VectorListIndexed) return false;
+ return VectorList.Count == 2 && VectorList.LaneIndex <= 1;
}
bool isVectorIndex8() const {
@@ -1233,6 +1412,14 @@ public:
Inst.addOperand(MCOperand::CreateImm(~CE->getValue()));
}
+ void addT2SOImmNegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The operand is actually a t2_so_imm, but we have its
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
+ }
+
void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The operand is actually a so_imm, but we have its bitwise
@@ -1241,6 +1428,14 @@ public:
Inst.addOperand(MCOperand::CreateImm(~CE->getValue()));
}
+ void addARMSOImmNegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The operand is actually a so_imm, but we have its
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
+ }
+
void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
@@ -1527,37 +1722,15 @@ public:
Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
}
- void addVecListOneDOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
- }
-
- void addVecListTwoDOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- // Only the first register actually goes on the instruction. The rest
- // are implied by the opcode.
- Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
- }
-
- void addVecListThreeDOperands(MCInst &Inst, unsigned N) const {
+ void addVecListOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- // Only the first register actually goes on the instruction. The rest
- // are implied by the opcode.
Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
}
- void addVecListFourDOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- // Only the first register actually goes on the instruction. The rest
- // are implied by the opcode.
- Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
- }
-
- void addVecListTwoQOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- // Only the first register actually goes on the instruction. The rest
- // are implied by the opcode.
+ void addVecListIndexedOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
+ Inst.addOperand(MCOperand::CreateImm(VectorList.LaneIndex));
}
void addVectorIndex8Operands(MCInst &Inst, unsigned N) const {
@@ -1780,10 +1953,32 @@ public:
}
static ARMOperand *CreateVectorList(unsigned RegNum, unsigned Count,
- SMLoc S, SMLoc E) {
+ bool isDoubleSpaced, SMLoc S, SMLoc E) {
ARMOperand *Op = new ARMOperand(k_VectorList);
Op->VectorList.RegNum = RegNum;
Op->VectorList.Count = Count;
+ Op->VectorList.isDoubleSpaced = isDoubleSpaced;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateVectorListAllLanes(unsigned RegNum, unsigned Count,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_VectorListAllLanes);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static ARMOperand *CreateVectorListIndexed(unsigned RegNum, unsigned Count,
+ unsigned Index, SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(k_VectorListIndexed);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->VectorList.LaneIndex = Index;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
@@ -1982,6 +2177,14 @@ void ARMOperand::print(raw_ostream &OS) const {
OS << "<vector_list " << VectorList.Count << " * "
<< VectorList.RegNum << ">";
break;
+ case k_VectorListAllLanes:
+ OS << "<vector_list(all lanes) " << VectorList.Count << " * "
+ << VectorList.RegNum << ">";
+ break;
+ case k_VectorListIndexed:
+ OS << "<vector_list(lane " << VectorList.LaneIndex << ") "
+ << VectorList.Count << " * " << VectorList.RegNum << ">";
+ break;
case k_Token:
OS << "'" << getToken() << "'";
break;
@@ -2000,7 +2203,9 @@ static unsigned MatchRegisterName(StringRef Name);
bool ARMAsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
+ StartLoc = Parser.getTok().getLoc();
RegNo = tryParseRegister();
+ EndLoc = Parser.getTok().getLoc();
return (RegNo == (unsigned)-1);
}
@@ -2013,8 +2218,6 @@ int ARMAsmParser::tryParseRegister() {
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier)) return -1;
- // FIXME: Validate register for the current architecture; we have to do
- // validation later, so maybe there is no need for this here.
std::string lowerCase = Tok.getString().lower();
unsigned RegNum = MatchRegisterName(lowerCase);
if (!RegNum) {
@@ -2023,9 +2226,34 @@ int ARMAsmParser::tryParseRegister() {
.Case("r14", ARM::LR)
.Case("r15", ARM::PC)
.Case("ip", ARM::R12)
+ // Additional register name aliases for 'gas' compatibility.
+ .Case("a1", ARM::R0)
+ .Case("a2", ARM::R1)
+ .Case("a3", ARM::R2)
+ .Case("a4", ARM::R3)
+ .Case("v1", ARM::R4)
+ .Case("v2", ARM::R5)
+ .Case("v3", ARM::R6)
+ .Case("v4", ARM::R7)
+ .Case("v5", ARM::R8)
+ .Case("v6", ARM::R9)
+ .Case("v7", ARM::R10)
+ .Case("v8", ARM::R11)
+ .Case("sb", ARM::R9)
+ .Case("sl", ARM::R10)
+ .Case("fp", ARM::R11)
.Default(0);
}
- if (!RegNum) return -1;
+ if (!RegNum) {
+ // Check for aliases registered via .req.
+ StringMap<unsigned>::const_iterator Entry =
+ RegisterReqs.find(Tok.getIdentifier());
+ // If no match, return failure.
+ if (Entry == RegisterReqs.end())
+ return -1;
+ Parser.Lex(); // Eat identifier token.
+ return Entry->getValue();
+ }
Parser.Lex(); // Eat identifier token.
@@ -2045,6 +2273,7 @@ int ARMAsmParser::tryParseShiftRegister(
std::string lowerCase = Tok.getString().lower();
ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase)
+ .Case("asl", ARM_AM::lsl)
.Case("lsl", ARM_AM::lsl)
.Case("lsr", ARM_AM::lsr)
.Case("asr", ARM_AM::asr)
@@ -2073,7 +2302,8 @@ int ARMAsmParser::tryParseShiftRegister(
ShiftReg = SrcReg;
} else {
// Figure out if this is shifted by a constant or a register (for non-RRX).
- if (Parser.getTok().is(AsmToken::Hash)) {
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar)) {
Parser.Lex(); // Eat hash.
SMLoc ImmLoc = Parser.getTok().getLoc();
const MCExpr *ShiftExpr = 0;
@@ -2446,6 +2676,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the comma.
RegLoc = Parser.getTok().getLoc();
int OldReg = Reg;
+ const AsmToken RegTok = Parser.getTok();
Reg = tryParseRegister();
if (Reg == -1)
return Error(RegLoc, "register expected");
@@ -2459,8 +2690,13 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!RC->contains(Reg))
return Error(RegLoc, "invalid register in register list");
// List must be monotonically increasing.
- if (getARMRegisterNumbering(Reg) <= getARMRegisterNumbering(OldReg))
+ if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg))
return Error(RegLoc, "register list not in ascending order");
+ if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) {
+ Warning(RegLoc, "duplicated register (" + RegTok.getString() +
+ ") in register list");
+ continue;
+ }
// VFP register lists must also be contiguous.
// It's OK to use the enumeration values directly here rather, as the
// VFP register classes have the enum sorted properly.
@@ -2477,13 +2713,55 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return Error(E, "'}' expected");
Parser.Lex(); // Eat '}' token.
+ // Push the register list operand.
Operands.push_back(ARMOperand::CreateRegList(Registers, S, E));
+
+ // The ARM system instruction variants for LDM/STM have a '^' token here.
+ if (Parser.getTok().is(AsmToken::Caret)) {
+ Operands.push_back(ARMOperand::CreateToken("^",Parser.getTok().getLoc()));
+ Parser.Lex(); // Eat '^' token.
+ }
+
return false;
}
+// Helper function to parse the lane index for vector lists.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) {
+ Index = 0; // Always return a defined index value.
+ if (Parser.getTok().is(AsmToken::LBrac)) {
+ Parser.Lex(); // Eat the '['.
+ if (Parser.getTok().is(AsmToken::RBrac)) {
+ // "Dn[]" is the 'all lanes' syntax.
+ LaneKind = AllLanes;
+ Parser.Lex(); // Eat the ']'.
+ return MatchOperand_Success;
+ }
+ if (Parser.getTok().is(AsmToken::Integer)) {
+ int64_t Val = Parser.getTok().getIntVal();
+ // Make this range check context sensitive for .8, .16, .32.
+ if (Val < 0 && Val > 7)
+ Error(Parser.getTok().getLoc(), "lane index out of range");
+ Index = Val;
+ LaneKind = IndexedLane;
+ Parser.Lex(); // Eat the token;
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ Error(Parser.getTok().getLoc(), "']' expected");
+ Parser.Lex(); // Eat the ']'.
+ return MatchOperand_Success;
+ }
+ Error(Parser.getTok().getLoc(), "lane index must be empty or an integer");
+ return MatchOperand_ParseFail;
+ }
+ LaneKind = NoLanes;
+ return MatchOperand_Success;
+}
+
// parse a vector register list
ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ VectorLaneTy LaneKind;
+ unsigned LaneIndex;
SMLoc S = Parser.getTok().getLoc();
// As an extension (to match gas), support a plain D register or Q register
// (without encosing curly braces) as a single or double entry list,
@@ -2494,12 +2772,48 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_NoMatch;
SMLoc E = Parser.getTok().getLoc();
if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
- Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, S, E));
+ OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+ if (Res != MatchOperand_Success)
+ return Res;
+ switch (LaneKind) {
+ default:
+ assert(0 && "unexpected lane kind!");
+ case NoLanes:
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E));
+ break;
+ case AllLanes:
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 1,
+ LaneIndex, S,E));
+ break;
+ }
return MatchOperand_Success;
}
if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
Reg = getDRegFromQReg(Reg);
- Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, S, E));
+ OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+ if (Res != MatchOperand_Success)
+ return Res;
+ switch (LaneKind) {
+ default:
+ assert(0 && "unexpected lane kind!");
+ case NoLanes:
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E));
+ break;
+ case AllLanes:
+ E = Parser.getTok().getLoc();
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 2,
+ LaneIndex, S,E));
+ break;
+ }
return MatchOperand_Success;
}
Error(S, "vector register expected");
@@ -2518,18 +2832,30 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_ParseFail;
}
unsigned Count = 1;
+ int Spacing = 0;
unsigned FirstReg = Reg;
// The list is of D registers, but we also allow Q regs and just interpret
// them as the two D sub-registers.
if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
FirstReg = Reg = getDRegFromQReg(Reg);
+ Spacing = 1; // double-spacing requires explicit D registers, otherwise
+ // it's ambiguous with four-register single spaced.
++Reg;
++Count;
}
+ if (parseVectorLane(LaneKind, LaneIndex) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
while (Parser.getTok().is(AsmToken::Comma) ||
Parser.getTok().is(AsmToken::Minus)) {
if (Parser.getTok().is(AsmToken::Minus)) {
+ if (!Spacing)
+ Spacing = 1; // Register range implies a single spaced list.
+ else if (Spacing == 2) {
+ Error(Parser.getTok().getLoc(),
+ "sequential registers in double spaced list");
+ return MatchOperand_ParseFail;
+ }
Parser.Lex(); // Eat the minus.
SMLoc EndLoc = Parser.getTok().getLoc();
int EndReg = tryParseRegister();
@@ -2554,6 +2880,16 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Error(EndLoc, "bad range in register list");
return MatchOperand_ParseFail;
}
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(EndLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
+ EndLoc = Parser.getTok().getLoc();
// Add all the registers in the range to the register list.
Count += EndReg - Reg;
@@ -2575,6 +2911,13 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// The list is of D registers, but we also allow Q regs and just interpret
// them as the two D sub-registers.
if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ if (!Spacing)
+ Spacing = 1; // Register range implies a single spaced list.
+ else if (Spacing == 2) {
+ Error(RegLoc,
+ "invalid register in double-spaced list (must be 'D' register')");
+ return MatchOperand_ParseFail;
+ }
Reg = getDRegFromQReg(Reg);
if (Reg != OldReg + 1) {
Error(RegLoc, "non-contiguous register range");
@@ -2582,14 +2925,45 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
++Reg;
Count += 2;
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ SMLoc EndLoc = Parser.getTok().getLoc();
+ if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(EndLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
continue;
}
- // Normal D register. Just check that it's contiguous and keep going.
- if (Reg != OldReg + 1) {
+ // Normal D register.
+ // Figure out the register spacing (single or double) of the list if
+ // we don't know it already.
+ if (!Spacing)
+ Spacing = 1 + (Reg == OldReg + 2);
+
+ // Just check that it's contiguous and keep going.
+ if (Reg != OldReg + Spacing) {
Error(RegLoc, "non-contiguous register range");
return MatchOperand_ParseFail;
}
++Count;
+ // Parse the lane specifier if present.
+ VectorLaneTy NextLaneKind;
+ unsigned NextLaneIndex;
+ SMLoc EndLoc = Parser.getTok().getLoc();
+ if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+ return MatchOperand_ParseFail;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+ Error(EndLoc, "mismatched lane index in register list");
+ return MatchOperand_ParseFail;
+ }
+ if (Spacing == 2 && LaneKind != NoLanes) {
+ Error(EndLoc,
+ "lane index specfier invalid in double spaced register list");
+ return MatchOperand_ParseFail;
+ }
}
SMLoc E = Parser.getTok().getLoc();
@@ -2599,7 +2973,22 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
Parser.Lex(); // Eat '}' token.
- Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count, S, E));
+ switch (LaneKind) {
+ default:
+ assert(0 && "unexpected lane kind in register list.");
+ case NoLanes:
+ Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count,
+ (Spacing == 2), S, E));
+ break;
+ case AllLanes:
+ Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count,
+ S, E));
+ break;
+ case IndexedLane:
+ Operands.push_back(ARMOperand::CreateVectorListIndexed(FirstReg, Count,
+ LaneIndex, S, E));
+ break;
+ }
return MatchOperand_Success;
}
@@ -2786,7 +3175,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
Parser.Lex(); // Eat shift type token.
// There must be a '#' and a shift amount.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2864,7 +3254,8 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the operator.
// A '#' and a shift amount.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2924,7 +3315,8 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Parser.Lex(); // Eat the operator.
// A '#' and a rotate amount.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2961,7 +3353,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
// The bitfield descriptor is really two operands, the LSB and the width.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -2993,7 +3386,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_ParseFail;
}
Parser.Lex(); // Eat hash token.
- if (Parser.getTok().isNot(AsmToken::Hash)) {
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar)) {
Error(Parser.getTok().getLoc(), "'#' expected");
return MatchOperand_ParseFail;
}
@@ -3087,7 +3481,8 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Tok.getLoc();
// Do immediates first, as we always parse those if we have a '#'.
- if (Parser.getTok().is(AsmToken::Hash)) {
+ if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar)) {
Parser.Lex(); // Eat the '#'.
// Explicitly look for a '-', as we need to encode negative zero
// differently.
@@ -3444,7 +3839,7 @@ bool ARMAsmParser::
cvtVLDwbFixed(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Vd
- ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
// Vn
@@ -3458,7 +3853,7 @@ bool ARMAsmParser::
cvtVLDwbRegister(MCInst &Inst, unsigned Opcode,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Vd
- ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
// Create a writeback register dummy placeholder.
Inst.addOperand(MCOperand::CreateImm(0));
// Vn
@@ -3478,7 +3873,7 @@ cvtVSTwbFixed(MCInst &Inst, unsigned Opcode,
// Vn
((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
// Vt
- ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
return true;
@@ -3494,7 +3889,7 @@ cvtVSTwbRegister(MCInst &Inst, unsigned Opcode,
// Vm
((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
// Vt
- ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
// pred
((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
return true;
@@ -3591,8 +3986,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// offset. Be friendly and also accept a plain integer (without a leading
// hash) for gas compatibility.
if (Parser.getTok().is(AsmToken::Hash) ||
+ Parser.getTok().is(AsmToken::Dollar) ||
Parser.getTok().is(AsmToken::Integer)) {
- if (Parser.getTok().is(AsmToken::Hash))
+ if (Parser.getTok().isNot(AsmToken::Integer))
Parser.Lex(); // Eat the '#'.
E = Parser.getTok().getLoc();
@@ -3690,7 +4086,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
if (Tok.isNot(AsmToken::Identifier))
return true;
StringRef ShiftName = Tok.getString();
- if (ShiftName == "lsl" || ShiftName == "LSL")
+ if (ShiftName == "lsl" || ShiftName == "LSL" ||
+ ShiftName == "asl" || ShiftName == "ASL")
St = ARM_AM::lsl;
else if (ShiftName == "lsr" || ShiftName == "LSR")
St = ARM_AM::lsr;
@@ -3710,7 +4107,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
Loc = Parser.getTok().getLoc();
// A '#' and a shift amount.
const AsmToken &HashTok = Parser.getTok();
- if (HashTok.isNot(AsmToken::Hash))
+ if (HashTok.isNot(AsmToken::Hash) &&
+ HashTok.isNot(AsmToken::Dollar))
return Error(HashTok.getLoc(), "'#' expected");
Parser.Lex(); // Eat hash token.
@@ -3739,7 +4137,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
- if (Parser.getTok().isNot(AsmToken::Hash))
+ if (Parser.getTok().isNot(AsmToken::Hash) &&
+ Parser.getTok().isNot(AsmToken::Dollar))
return MatchOperand_NoMatch;
// Disambiguate the VMOV forms that can accept an FP immediate.
@@ -3852,6 +4251,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return parseMemory(Operands);
case AsmToken::LCurly:
return parseRegisterList(Operands);
+ case AsmToken::Dollar:
case AsmToken::Hash: {
// #42 -> immediate.
// TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
@@ -3990,7 +4390,9 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "mrs" || Mnemonic == "smmls" || Mnemonic == "vabs" ||
Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vmrs" ||
Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" ||
- Mnemonic == "vrsqrts" || Mnemonic == "srs" ||
+ Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" ||
+ Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" ||
+ Mnemonic == "fsts" ||
(Mnemonic == "movs" && isThumb()))) {
Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
CarrySetting = true;
@@ -4206,9 +4608,27 @@ static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
}
+static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features);
/// Parse an arm instruction mnemonic followed by its operands.
bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Apply mnemonic aliases before doing anything else, as the destination
+ // mnemnonic may include suffices and we want to handle them normally.
+ // The generic tblgen'erated code does this later, at the start of
+ // MatchInstructionImpl(), but that's too late for aliases that include
+ // any sort of suffix.
+ unsigned AvailableFeatures = getAvailableFeatures();
+ applyMnemonicAliases(Name, AvailableFeatures);
+
+ // First check for the ARM-specific .req directive.
+ if (Parser.getTok().is(AsmToken::Identifier) &&
+ Parser.getTok().getIdentifier() == ".req") {
+ parseDirectiveReq(Name, NameLoc);
+ // We always return 'error' for this, as we're done with this
+ // statement and don't need to match the 'instruction."
+ return true;
+ }
+
// Create the leading tokens for the mnemonic, split by '.' characters.
size_t Start = 0, Next = Name.find('.');
StringRef Mnemonic = Name.slice(Start, Next);
@@ -4400,12 +4820,21 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
}
}
// Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the
- // end. Convert it to a token here.
+ // end. Convert it to a token here. Take care not to convert those
+ // that should hit the Thumb2 encoding.
if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 &&
+ static_cast<ARMOperand*>(Operands[3])->isReg() &&
+ static_cast<ARMOperand*>(Operands[4])->isReg() &&
static_cast<ARMOperand*>(Operands[5])->isImm()) {
ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]);
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
- if (CE && CE->getValue() == 0) {
+ if (CE && CE->getValue() == 0 &&
+ (isThumbOne() ||
+ // The cc_out operand matches the IT block.
+ ((inITBlock() != CarrySetting) &&
+ // Neither register operand is a high register.
+ (isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) &&
+ isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()))))){
Operands.erase(Operands.begin() + 5);
Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
delete Op;
@@ -4605,11 +5034,495 @@ validateInstruction(MCInst &Inst,
return false;
}
+static unsigned getRealVSTLNOpcode(unsigned Opc) {
+ switch(Opc) {
+ default: assert(0 && "unexpected opcode!");
+ // VST1LN
+ case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_P8:
+ case ARM::VST1LNdWB_fixed_Asm_I8: case ARM::VST1LNdWB_fixed_Asm_S8:
+ case ARM::VST1LNdWB_fixed_Asm_U8:
+ return ARM::VST1LNd8_UPD;
+ case ARM::VST1LNdWB_fixed_Asm_16: case ARM::VST1LNdWB_fixed_Asm_P16:
+ case ARM::VST1LNdWB_fixed_Asm_I16: case ARM::VST1LNdWB_fixed_Asm_S16:
+ case ARM::VST1LNdWB_fixed_Asm_U16:
+ return ARM::VST1LNd16_UPD;
+ case ARM::VST1LNdWB_fixed_Asm_32: case ARM::VST1LNdWB_fixed_Asm_F:
+ case ARM::VST1LNdWB_fixed_Asm_F32: case ARM::VST1LNdWB_fixed_Asm_I32:
+ case ARM::VST1LNdWB_fixed_Asm_S32: case ARM::VST1LNdWB_fixed_Asm_U32:
+ return ARM::VST1LNd32_UPD;
+ case ARM::VST1LNdWB_register_Asm_8: case ARM::VST1LNdWB_register_Asm_P8:
+ case ARM::VST1LNdWB_register_Asm_I8: case ARM::VST1LNdWB_register_Asm_S8:
+ case ARM::VST1LNdWB_register_Asm_U8:
+ return ARM::VST1LNd8_UPD;
+ case ARM::VST1LNdWB_register_Asm_16: case ARM::VST1LNdWB_register_Asm_P16:
+ case ARM::VST1LNdWB_register_Asm_I16: case ARM::VST1LNdWB_register_Asm_S16:
+ case ARM::VST1LNdWB_register_Asm_U16:
+ return ARM::VST1LNd16_UPD;
+ case ARM::VST1LNdWB_register_Asm_32: case ARM::VST1LNdWB_register_Asm_F:
+ case ARM::VST1LNdWB_register_Asm_F32: case ARM::VST1LNdWB_register_Asm_I32:
+ case ARM::VST1LNdWB_register_Asm_S32: case ARM::VST1LNdWB_register_Asm_U32:
+ return ARM::VST1LNd32_UPD;
+ case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_P8:
+ case ARM::VST1LNdAsm_I8: case ARM::VST1LNdAsm_S8:
+ case ARM::VST1LNdAsm_U8:
+ return ARM::VST1LNd8;
+ case ARM::VST1LNdAsm_16: case ARM::VST1LNdAsm_P16:
+ case ARM::VST1LNdAsm_I16: case ARM::VST1LNdAsm_S16:
+ case ARM::VST1LNdAsm_U16:
+ return ARM::VST1LNd16;
+ case ARM::VST1LNdAsm_32: case ARM::VST1LNdAsm_F:
+ case ARM::VST1LNdAsm_F32: case ARM::VST1LNdAsm_I32:
+ case ARM::VST1LNdAsm_S32: case ARM::VST1LNdAsm_U32:
+ return ARM::VST1LNd32;
+
+ // VST2LN
+ case ARM::VST2LNdWB_fixed_Asm_8: case ARM::VST2LNdWB_fixed_Asm_P8:
+ case ARM::VST2LNdWB_fixed_Asm_I8: case ARM::VST2LNdWB_fixed_Asm_S8:
+ case ARM::VST2LNdWB_fixed_Asm_U8:
+ return ARM::VST2LNd8_UPD;
+ case ARM::VST2LNdWB_fixed_Asm_16: case ARM::VST2LNdWB_fixed_Asm_P16:
+ case ARM::VST2LNdWB_fixed_Asm_I16: case ARM::VST2LNdWB_fixed_Asm_S16:
+ case ARM::VST2LNdWB_fixed_Asm_U16:
+ return ARM::VST2LNd16_UPD;
+ case ARM::VST2LNdWB_fixed_Asm_32: case ARM::VST2LNdWB_fixed_Asm_F:
+ case ARM::VST2LNdWB_fixed_Asm_F32: case ARM::VST2LNdWB_fixed_Asm_I32:
+ case ARM::VST2LNdWB_fixed_Asm_S32: case ARM::VST2LNdWB_fixed_Asm_U32:
+ return ARM::VST2LNd32_UPD;
+ case ARM::VST2LNdWB_register_Asm_8: case ARM::VST2LNdWB_register_Asm_P8:
+ case ARM::VST2LNdWB_register_Asm_I8: case ARM::VST2LNdWB_register_Asm_S8:
+ case ARM::VST2LNdWB_register_Asm_U8:
+ return ARM::VST2LNd8_UPD;
+ case ARM::VST2LNdWB_register_Asm_16: case ARM::VST2LNdWB_register_Asm_P16:
+ case ARM::VST2LNdWB_register_Asm_I16: case ARM::VST2LNdWB_register_Asm_S16:
+ case ARM::VST2LNdWB_register_Asm_U16:
+ return ARM::VST2LNd16_UPD;
+ case ARM::VST2LNdWB_register_Asm_32: case ARM::VST2LNdWB_register_Asm_F:
+ case ARM::VST2LNdWB_register_Asm_F32: case ARM::VST2LNdWB_register_Asm_I32:
+ case ARM::VST2LNdWB_register_Asm_S32: case ARM::VST2LNdWB_register_Asm_U32:
+ return ARM::VST2LNd32_UPD;
+ case ARM::VST2LNdAsm_8: case ARM::VST2LNdAsm_P8:
+ case ARM::VST2LNdAsm_I8: case ARM::VST2LNdAsm_S8:
+ case ARM::VST2LNdAsm_U8:
+ return ARM::VST2LNd8;
+ case ARM::VST2LNdAsm_16: case ARM::VST2LNdAsm_P16:
+ case ARM::VST2LNdAsm_I16: case ARM::VST2LNdAsm_S16:
+ case ARM::VST2LNdAsm_U16:
+ return ARM::VST2LNd16;
+ case ARM::VST2LNdAsm_32: case ARM::VST2LNdAsm_F:
+ case ARM::VST2LNdAsm_F32: case ARM::VST2LNdAsm_I32:
+ case ARM::VST2LNdAsm_S32: case ARM::VST2LNdAsm_U32:
+ return ARM::VST2LNd32;
+ }
+}
+
+static unsigned getRealVLDLNOpcode(unsigned Opc) {
+ switch(Opc) {
+ default: assert(0 && "unexpected opcode!");
+ // VLD1LN
+ case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_P8:
+ case ARM::VLD1LNdWB_fixed_Asm_I8: case ARM::VLD1LNdWB_fixed_Asm_S8:
+ case ARM::VLD1LNdWB_fixed_Asm_U8:
+ return ARM::VLD1LNd8_UPD;
+ case ARM::VLD1LNdWB_fixed_Asm_16: case ARM::VLD1LNdWB_fixed_Asm_P16:
+ case ARM::VLD1LNdWB_fixed_Asm_I16: case ARM::VLD1LNdWB_fixed_Asm_S16:
+ case ARM::VLD1LNdWB_fixed_Asm_U16:
+ return ARM::VLD1LNd16_UPD;
+ case ARM::VLD1LNdWB_fixed_Asm_32: case ARM::VLD1LNdWB_fixed_Asm_F:
+ case ARM::VLD1LNdWB_fixed_Asm_F32: case ARM::VLD1LNdWB_fixed_Asm_I32:
+ case ARM::VLD1LNdWB_fixed_Asm_S32: case ARM::VLD1LNdWB_fixed_Asm_U32:
+ return ARM::VLD1LNd32_UPD;
+ case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_P8:
+ case ARM::VLD1LNdWB_register_Asm_I8: case ARM::VLD1LNdWB_register_Asm_S8:
+ case ARM::VLD1LNdWB_register_Asm_U8:
+ return ARM::VLD1LNd8_UPD;
+ case ARM::VLD1LNdWB_register_Asm_16: case ARM::VLD1LNdWB_register_Asm_P16:
+ case ARM::VLD1LNdWB_register_Asm_I16: case ARM::VLD1LNdWB_register_Asm_S16:
+ case ARM::VLD1LNdWB_register_Asm_U16:
+ return ARM::VLD1LNd16_UPD;
+ case ARM::VLD1LNdWB_register_Asm_32: case ARM::VLD1LNdWB_register_Asm_F:
+ case ARM::VLD1LNdWB_register_Asm_F32: case ARM::VLD1LNdWB_register_Asm_I32:
+ case ARM::VLD1LNdWB_register_Asm_S32: case ARM::VLD1LNdWB_register_Asm_U32:
+ return ARM::VLD1LNd32_UPD;
+ case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_P8:
+ case ARM::VLD1LNdAsm_I8: case ARM::VLD1LNdAsm_S8:
+ case ARM::VLD1LNdAsm_U8:
+ return ARM::VLD1LNd8;
+ case ARM::VLD1LNdAsm_16: case ARM::VLD1LNdAsm_P16:
+ case ARM::VLD1LNdAsm_I16: case ARM::VLD1LNdAsm_S16:
+ case ARM::VLD1LNdAsm_U16:
+ return ARM::VLD1LNd16;
+ case ARM::VLD1LNdAsm_32: case ARM::VLD1LNdAsm_F:
+ case ARM::VLD1LNdAsm_F32: case ARM::VLD1LNdAsm_I32:
+ case ARM::VLD1LNdAsm_S32: case ARM::VLD1LNdAsm_U32:
+ return ARM::VLD1LNd32;
+
+ // VLD2LN
+ case ARM::VLD2LNdWB_fixed_Asm_8: case ARM::VLD2LNdWB_fixed_Asm_P8:
+ case ARM::VLD2LNdWB_fixed_Asm_I8: case ARM::VLD2LNdWB_fixed_Asm_S8:
+ case ARM::VLD2LNdWB_fixed_Asm_U8:
+ return ARM::VLD2LNd8_UPD;
+ case ARM::VLD2LNdWB_fixed_Asm_16: case ARM::VLD2LNdWB_fixed_Asm_P16:
+ case ARM::VLD2LNdWB_fixed_Asm_I16: case ARM::VLD2LNdWB_fixed_Asm_S16:
+ case ARM::VLD2LNdWB_fixed_Asm_U16:
+ return ARM::VLD2LNd16_UPD;
+ case ARM::VLD2LNdWB_fixed_Asm_32: case ARM::VLD2LNdWB_fixed_Asm_F:
+ case ARM::VLD2LNdWB_fixed_Asm_F32: case ARM::VLD2LNdWB_fixed_Asm_I32:
+ case ARM::VLD2LNdWB_fixed_Asm_S32: case ARM::VLD2LNdWB_fixed_Asm_U32:
+ return ARM::VLD2LNd32_UPD;
+ case ARM::VLD2LNdWB_register_Asm_8: case ARM::VLD2LNdWB_register_Asm_P8:
+ case ARM::VLD2LNdWB_register_Asm_I8: case ARM::VLD2LNdWB_register_Asm_S8:
+ case ARM::VLD2LNdWB_register_Asm_U8:
+ return ARM::VLD2LNd8_UPD;
+ case ARM::VLD2LNdWB_register_Asm_16: case ARM::VLD2LNdWB_register_Asm_P16:
+ case ARM::VLD2LNdWB_register_Asm_I16: case ARM::VLD2LNdWB_register_Asm_S16:
+ case ARM::VLD2LNdWB_register_Asm_U16:
+ return ARM::VLD2LNd16_UPD;
+ case ARM::VLD2LNdWB_register_Asm_32: case ARM::VLD2LNdWB_register_Asm_F:
+ case ARM::VLD2LNdWB_register_Asm_F32: case ARM::VLD2LNdWB_register_Asm_I32:
+ case ARM::VLD2LNdWB_register_Asm_S32: case ARM::VLD2LNdWB_register_Asm_U32:
+ return ARM::VLD2LNd32_UPD;
+ case ARM::VLD2LNdAsm_8: case ARM::VLD2LNdAsm_P8:
+ case ARM::VLD2LNdAsm_I8: case ARM::VLD2LNdAsm_S8:
+ case ARM::VLD2LNdAsm_U8:
+ return ARM::VLD2LNd8;
+ case ARM::VLD2LNdAsm_16: case ARM::VLD2LNdAsm_P16:
+ case ARM::VLD2LNdAsm_I16: case ARM::VLD2LNdAsm_S16:
+ case ARM::VLD2LNdAsm_U16:
+ return ARM::VLD2LNd16;
+ case ARM::VLD2LNdAsm_32: case ARM::VLD2LNdAsm_F:
+ case ARM::VLD2LNdAsm_F32: case ARM::VLD2LNdAsm_I32:
+ case ARM::VLD2LNdAsm_S32: case ARM::VLD2LNdAsm_U32:
+ return ARM::VLD2LNd32;
+ }
+}
+
bool ARMAsmParser::
processInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
switch (Inst.getOpcode()) {
- // Handle the MOV complex aliases.
+ // Handle NEON VST complex aliases.
+ case ARM::VST1LNdWB_register_Asm_8: case ARM::VST1LNdWB_register_Asm_P8:
+ case ARM::VST1LNdWB_register_Asm_I8: case ARM::VST1LNdWB_register_Asm_S8:
+ case ARM::VST1LNdWB_register_Asm_U8: case ARM::VST1LNdWB_register_Asm_16:
+ case ARM::VST1LNdWB_register_Asm_P16: case ARM::VST1LNdWB_register_Asm_I16:
+ case ARM::VST1LNdWB_register_Asm_S16: case ARM::VST1LNdWB_register_Asm_U16:
+ case ARM::VST1LNdWB_register_Asm_32: case ARM::VST1LNdWB_register_Asm_F:
+ case ARM::VST1LNdWB_register_Asm_F32: case ARM::VST1LNdWB_register_Asm_I32:
+ case ARM::VST1LNdWB_register_Asm_S32: case ARM::VST1LNdWB_register_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdWB_register_Asm_8: case ARM::VST2LNdWB_register_Asm_P8:
+ case ARM::VST2LNdWB_register_Asm_I8: case ARM::VST2LNdWB_register_Asm_S8:
+ case ARM::VST2LNdWB_register_Asm_U8: case ARM::VST2LNdWB_register_Asm_16:
+ case ARM::VST2LNdWB_register_Asm_P16: case ARM::VST2LNdWB_register_Asm_I16:
+ case ARM::VST2LNdWB_register_Asm_S16: case ARM::VST2LNdWB_register_Asm_U16:
+ case ARM::VST2LNdWB_register_Asm_32: case ARM::VST2LNdWB_register_Asm_F:
+ case ARM::VST2LNdWB_register_Asm_F32: case ARM::VST2LNdWB_register_Asm_I32:
+ case ARM::VST2LNdWB_register_Asm_S32: case ARM::VST2LNdWB_register_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_P8:
+ case ARM::VST1LNdWB_fixed_Asm_I8: case ARM::VST1LNdWB_fixed_Asm_S8:
+ case ARM::VST1LNdWB_fixed_Asm_U8: case ARM::VST1LNdWB_fixed_Asm_16:
+ case ARM::VST1LNdWB_fixed_Asm_P16: case ARM::VST1LNdWB_fixed_Asm_I16:
+ case ARM::VST1LNdWB_fixed_Asm_S16: case ARM::VST1LNdWB_fixed_Asm_U16:
+ case ARM::VST1LNdWB_fixed_Asm_32: case ARM::VST1LNdWB_fixed_Asm_F:
+ case ARM::VST1LNdWB_fixed_Asm_F32: case ARM::VST1LNdWB_fixed_Asm_I32:
+ case ARM::VST1LNdWB_fixed_Asm_S32: case ARM::VST1LNdWB_fixed_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdWB_fixed_Asm_8: case ARM::VST2LNdWB_fixed_Asm_P8:
+ case ARM::VST2LNdWB_fixed_Asm_I8: case ARM::VST2LNdWB_fixed_Asm_S8:
+ case ARM::VST2LNdWB_fixed_Asm_U8: case ARM::VST2LNdWB_fixed_Asm_16:
+ case ARM::VST2LNdWB_fixed_Asm_P16: case ARM::VST2LNdWB_fixed_Asm_I16:
+ case ARM::VST2LNdWB_fixed_Asm_S16: case ARM::VST2LNdWB_fixed_Asm_U16:
+ case ARM::VST2LNdWB_fixed_Asm_32: case ARM::VST2LNdWB_fixed_Asm_F:
+ case ARM::VST2LNdWB_fixed_Asm_F32: case ARM::VST2LNdWB_fixed_Asm_I32:
+ case ARM::VST2LNdWB_fixed_Asm_S32: case ARM::VST2LNdWB_fixed_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_P8: case ARM::VST1LNdAsm_I8:
+ case ARM::VST1LNdAsm_S8: case ARM::VST1LNdAsm_U8: case ARM::VST1LNdAsm_16:
+ case ARM::VST1LNdAsm_P16: case ARM::VST1LNdAsm_I16: case ARM::VST1LNdAsm_S16:
+ case ARM::VST1LNdAsm_U16: case ARM::VST1LNdAsm_32: case ARM::VST1LNdAsm_F:
+ case ARM::VST1LNdAsm_F32: case ARM::VST1LNdAsm_I32: case ARM::VST1LNdAsm_S32:
+ case ARM::VST1LNdAsm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VST2LNdAsm_8: case ARM::VST2LNdAsm_P8: case ARM::VST2LNdAsm_I8:
+ case ARM::VST2LNdAsm_S8: case ARM::VST2LNdAsm_U8: case ARM::VST2LNdAsm_16:
+ case ARM::VST2LNdAsm_P16: case ARM::VST2LNdAsm_I16: case ARM::VST2LNdAsm_S16:
+ case ARM::VST2LNdAsm_U16: case ARM::VST2LNdAsm_32: case ARM::VST2LNdAsm_F:
+ case ARM::VST2LNdAsm_F32: case ARM::VST2LNdAsm_I32: case ARM::VST2LNdAsm_S32:
+ case ARM::VST2LNdAsm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+ // Handle NEON VLD complex aliases.
+ case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_P8:
+ case ARM::VLD1LNdWB_register_Asm_I8: case ARM::VLD1LNdWB_register_Asm_S8:
+ case ARM::VLD1LNdWB_register_Asm_U8: case ARM::VLD1LNdWB_register_Asm_16:
+ case ARM::VLD1LNdWB_register_Asm_P16: case ARM::VLD1LNdWB_register_Asm_I16:
+ case ARM::VLD1LNdWB_register_Asm_S16: case ARM::VLD1LNdWB_register_Asm_U16:
+ case ARM::VLD1LNdWB_register_Asm_32: case ARM::VLD1LNdWB_register_Asm_F:
+ case ARM::VLD1LNdWB_register_Asm_F32: case ARM::VLD1LNdWB_register_Asm_I32:
+ case ARM::VLD1LNdWB_register_Asm_S32: case ARM::VLD1LNdWB_register_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdWB_register_Asm_8: case ARM::VLD2LNdWB_register_Asm_P8:
+ case ARM::VLD2LNdWB_register_Asm_I8: case ARM::VLD2LNdWB_register_Asm_S8:
+ case ARM::VLD2LNdWB_register_Asm_U8: case ARM::VLD2LNdWB_register_Asm_16:
+ case ARM::VLD2LNdWB_register_Asm_P16: case ARM::VLD2LNdWB_register_Asm_I16:
+ case ARM::VLD2LNdWB_register_Asm_S16: case ARM::VLD2LNdWB_register_Asm_U16:
+ case ARM::VLD2LNdWB_register_Asm_32: case ARM::VLD2LNdWB_register_Asm_F:
+ case ARM::VLD2LNdWB_register_Asm_F32: case ARM::VLD2LNdWB_register_Asm_I32:
+ case ARM::VLD2LNdWB_register_Asm_S32: case ARM::VLD2LNdWB_register_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(4)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(6));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_P8:
+ case ARM::VLD1LNdWB_fixed_Asm_I8: case ARM::VLD1LNdWB_fixed_Asm_S8:
+ case ARM::VLD1LNdWB_fixed_Asm_U8: case ARM::VLD1LNdWB_fixed_Asm_16:
+ case ARM::VLD1LNdWB_fixed_Asm_P16: case ARM::VLD1LNdWB_fixed_Asm_I16:
+ case ARM::VLD1LNdWB_fixed_Asm_S16: case ARM::VLD1LNdWB_fixed_Asm_U16:
+ case ARM::VLD1LNdWB_fixed_Asm_32: case ARM::VLD1LNdWB_fixed_Asm_F:
+ case ARM::VLD1LNdWB_fixed_Asm_F32: case ARM::VLD1LNdWB_fixed_Asm_I32:
+ case ARM::VLD1LNdWB_fixed_Asm_S32: case ARM::VLD1LNdWB_fixed_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdWB_fixed_Asm_8: case ARM::VLD2LNdWB_fixed_Asm_P8:
+ case ARM::VLD2LNdWB_fixed_Asm_I8: case ARM::VLD2LNdWB_fixed_Asm_S8:
+ case ARM::VLD2LNdWB_fixed_Asm_U8: case ARM::VLD2LNdWB_fixed_Asm_16:
+ case ARM::VLD2LNdWB_fixed_Asm_P16: case ARM::VLD2LNdWB_fixed_Asm_I16:
+ case ARM::VLD2LNdWB_fixed_Asm_S16: case ARM::VLD2LNdWB_fixed_Asm_U16:
+ case ARM::VLD2LNdWB_fixed_Asm_32: case ARM::VLD2LNdWB_fixed_Asm_F:
+ case ARM::VLD2LNdWB_fixed_Asm_F32: case ARM::VLD2LNdWB_fixed_Asm_I32:
+ case ARM::VLD2LNdWB_fixed_Asm_S32: case ARM::VLD2LNdWB_fixed_Asm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_P8: case ARM::VLD1LNdAsm_I8:
+ case ARM::VLD1LNdAsm_S8: case ARM::VLD1LNdAsm_U8: case ARM::VLD1LNdAsm_16:
+ case ARM::VLD1LNdAsm_P16: case ARM::VLD1LNdAsm_I16: case ARM::VLD1LNdAsm_S16:
+ case ARM::VLD1LNdAsm_U16: case ARM::VLD1LNdAsm_32: case ARM::VLD1LNdAsm_F:
+ case ARM::VLD1LNdAsm_F32: case ARM::VLD1LNdAsm_I32: case ARM::VLD1LNdAsm_S32:
+ case ARM::VLD1LNdAsm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+
+ case ARM::VLD2LNdAsm_8: case ARM::VLD2LNdAsm_P8: case ARM::VLD2LNdAsm_I8:
+ case ARM::VLD2LNdAsm_S8: case ARM::VLD2LNdAsm_U8: case ARM::VLD2LNdAsm_16:
+ case ARM::VLD2LNdAsm_P16: case ARM::VLD2LNdAsm_I16: case ARM::VLD2LNdAsm_S16:
+ case ARM::VLD2LNdAsm_U16: case ARM::VLD2LNdAsm_32: case ARM::VLD2LNdAsm_F:
+ case ARM::VLD2LNdAsm_F32: case ARM::VLD2LNdAsm_I32: case ARM::VLD2LNdAsm_S32:
+ case ARM::VLD2LNdAsm_U32: {
+ MCInst TmpInst;
+ // Shuffle the operands around so the lane index operand is in the
+ // right place.
+ TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+ TmpInst.addOperand(Inst.getOperand(0)); // Vd
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(2)); // Rn
+ TmpInst.addOperand(Inst.getOperand(3)); // alignment
+ TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+ TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+ TmpInst.addOperand(Inst.getOperand(1)); // lane
+ TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(5));
+ Inst = TmpInst;
+ return true;
+ }
+ // Handle the Thumb2 mode MOV complex aliases.
+ case ARM::t2MOVsi:
+ case ARM::t2MOVSsi: {
+ // Which instruction to expand to depends on the CCOut operand and
+ // whether we're in an IT block if the register operands are low
+ // registers.
+ bool isNarrow = false;
+ if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi))
+ isNarrow = true;
+ MCInst TmpInst;
+ unsigned newOpc;
+ switch(ARM_AM::getSORegShOp(Inst.getOperand(2).getImm())) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break;
+ case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break;
+ case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break;
+ case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break;
+ }
+ unsigned Ammount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm());
+ if (Ammount == 32) Ammount = 0;
+ TmpInst.setOpcode(newOpc);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rd
+ if (isNarrow)
+ TmpInst.addOperand(MCOperand::CreateReg(
+ Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(MCOperand::CreateImm(Ammount));
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ TmpInst.addOperand(Inst.getOperand(4));
+ if (!isNarrow)
+ TmpInst.addOperand(MCOperand::CreateReg(
+ Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
+ Inst = TmpInst;
+ return true;
+ }
+ // Handle the ARM mode MOV complex aliases.
case ARM::ASRr:
case ARM::LSRr:
case ARM::LSLr:
@@ -4743,6 +5656,24 @@ processInstruction(MCInst &Inst,
Inst = TmpInst;
}
break;
+ case ARM::t2ADDri12:
+ // If the immediate fits for encoding T3 (t2ADDri) and the generic "add"
+ // mnemonic was used (not "addw"), encoding T3 is preferred.
+ if (static_cast<ARMOperand*>(Operands[0])->getToken() != "add" ||
+ ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
+ break;
+ Inst.setOpcode(ARM::t2ADDri);
+ Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+ break;
+ case ARM::t2SUBri12:
+ // If the immediate fits for encoding T3 (t2SUBri) and the generic "sub"
+ // mnemonic was used (not "subw"), encoding T3 is preferred.
+ if (static_cast<ARMOperand*>(Operands[0])->getToken() != "sub" ||
+ ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
+ break;
+ Inst.setOpcode(ARM::t2SUBri);
+ Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+ break;
case ARM::tADDi8:
// If the immediate is in the range 0-7, we want tADDi3 iff Rd was
// explicitly specified. From the ARM ARM: "Encoding T1 is preferred
@@ -4763,6 +5694,26 @@ processInstruction(MCInst &Inst,
return true;
}
break;
+ case ARM::t2ADDrr: {
+ // If the destination and first source operand are the same, and
+ // there's no setting of the flags, use encoding T2 instead of T3.
+ // Note that this is only for ADD, not SUB. This mirrors the system
+ // 'as' behaviour. Make sure the wide encoding wasn't explicit.
+ if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
+ Inst.getOperand(5).getReg() != 0 ||
+ (static_cast<ARMOperand*>(Operands[3])->isToken() &&
+ static_cast<ARMOperand*>(Operands[3])->getToken() == ".w"))
+ break;
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::tADDhirr);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
case ARM::tB:
// A Thumb conditional branch outside of an IT block is a tBcc.
if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) {
@@ -5079,12 +6030,16 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveWord(4, DirectiveID.getLoc());
else if (IDVal == ".thumb")
return parseDirectiveThumb(DirectiveID.getLoc());
+ else if (IDVal == ".arm")
+ return parseDirectiveARM(DirectiveID.getLoc());
else if (IDVal == ".thumb_func")
return parseDirectiveThumbFunc(DirectiveID.getLoc());
else if (IDVal == ".code")
return parseDirectiveCode(DirectiveID.getLoc());
else if (IDVal == ".syntax")
return parseDirectiveSyntax(DirectiveID.getLoc());
+ else if (IDVal == ".unreq")
+ return parseDirectiveUnreq(DirectiveID.getLoc());
return true;
}
@@ -5120,9 +6075,22 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
return Error(L, "unexpected token in directive");
Parser.Lex();
- // TODO: set thumb mode
- // TODO: tell the MC streamer the mode
- // getParser().getStreamer().Emit???();
+ if (!isThumb())
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+ return false;
+}
+
+/// parseDirectiveARM
+/// ::= .arm
+bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+
+ if (isThumb())
+ SwitchMode();
+ getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
return false;
}
@@ -5212,6 +6180,45 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
return false;
}
+/// parseDirectiveReq
+/// ::= name .req registername
+bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
+ Parser.Lex(); // Eat the '.req' token.
+ unsigned Reg;
+ SMLoc SRegLoc, ERegLoc;
+ if (ParseRegister(Reg, SRegLoc, ERegLoc)) {
+ Parser.EatToEndOfStatement();
+ return Error(SRegLoc, "register name expected");
+ }
+
+ // Shouldn't be anything else.
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
+ Parser.EatToEndOfStatement();
+ return Error(Parser.getTok().getLoc(),
+ "unexpected input in .req directive.");
+ }
+
+ Parser.Lex(); // Consume the EndOfStatement
+
+ if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg)
+ return Error(SRegLoc, "redefinition of '" + Name +
+ "' does not match original.");
+
+ return false;
+}
+
+/// parseDirectiveUneq
+/// ::= .unreq registername
+bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Parser.EatToEndOfStatement();
+ return Error(L, "unexpected input in .unreq directive.");
+ }
+ RegisterReqs.erase(Parser.getTok().getIdentifier());
+ Parser.Lex(); // Eat the identifier.
+ return false;
+}
+
extern "C" void LLVMInitializeARMAsmLexer();
/// Force static initialization.
diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt
index 3f5ad39..e24a1b1 100644
--- a/lib/Target/ARM/AsmParser/CMakeLists.txt
+++ b/lib/Target/ARM/AsmParser/CMakeLists.txt
@@ -6,11 +6,3 @@ add_llvm_library(LLVMARMAsmParser
)
add_dependencies(LLVMARMAsmParser ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMAsmParser
- LLVMARMDesc
- LLVMARMInfo
- LLVMMC
- LLVMMCParser
- LLVMSupport
- )
diff --git a/lib/Target/ARM/AsmParser/LLVMBuild.txt b/lib/Target/ARM/AsmParser/LLVMBuild.txt
index cbf9b4b..f0184b6 100644
--- a/lib/Target/ARM/AsmParser/LLVMBuild.txt
+++ b/lib/Target/ARM/AsmParser/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = ARMAsmParser
parent = ARM
required_libraries = ARMDesc ARMInfo MC MCParser Support
add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 511932e..04cdf55 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -48,20 +48,6 @@ add_llvm_target(ARMCodeGen
Thumb2SizeReduction.cpp
)
-add_llvm_library_dependencies(LLVMARMCodeGen
- LLVMARMAsmPrinter
- LLVMARMDesc
- LLVMARMInfo
- LLVMAnalysis
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
# workaround for hanging compilation on MSVC9, 10
if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 )
set_property(
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index ad250ab..49c64fd 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -2085,15 +2085,24 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
case ARM::VLD1d32Qwb_register:
case ARM::VLD1d64Qwb_fixed:
case ARM::VLD1d64Qwb_register:
- case ARM::VLD2d8_UPD:
- case ARM::VLD2d16_UPD:
- case ARM::VLD2d32_UPD:
- case ARM::VLD2q8_UPD:
- case ARM::VLD2q16_UPD:
- case ARM::VLD2q32_UPD:
- case ARM::VLD2b8_UPD:
- case ARM::VLD2b16_UPD:
- case ARM::VLD2b32_UPD:
+ case ARM::VLD2d8wb_fixed:
+ case ARM::VLD2d16wb_fixed:
+ case ARM::VLD2d32wb_fixed:
+ case ARM::VLD2q8wb_fixed:
+ case ARM::VLD2q16wb_fixed:
+ case ARM::VLD2q32wb_fixed:
+ case ARM::VLD2d8wb_register:
+ case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_register:
+ case ARM::VLD2q8wb_register:
+ case ARM::VLD2q16wb_register:
+ case ARM::VLD2q32wb_register:
+ case ARM::VLD2b8wb_fixed:
+ case ARM::VLD2b16wb_fixed:
+ case ARM::VLD2b32wb_fixed:
+ case ARM::VLD2b8wb_register:
+ case ARM::VLD2b16wb_register:
+ case ARM::VLD2b32wb_register:
case ARM::VLD3d8_UPD:
case ARM::VLD3d16_UPD:
case ARM::VLD3d32_UPD:
@@ -2196,23 +2205,40 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
case ARM::VST1q16wb_register:
case ARM::VST1q32wb_register:
case ARM::VST1q64wb_register:
- case ARM::VST1d8T_UPD:
- case ARM::VST1d16T_UPD:
- case ARM::VST1d32T_UPD:
- case ARM::VST1d64T_UPD:
- case ARM::VST1d8Q_UPD:
- case ARM::VST1d16Q_UPD:
- case ARM::VST1d32Q_UPD:
- case ARM::VST1d64Q_UPD:
- case ARM::VST2d8_UPD:
- case ARM::VST2d16_UPD:
- case ARM::VST2d32_UPD:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
- case ARM::VST2b8_UPD:
- case ARM::VST2b16_UPD:
- case ARM::VST2b32_UPD:
+ case ARM::VST1d8Twb_fixed:
+ case ARM::VST1d16Twb_fixed:
+ case ARM::VST1d32Twb_fixed:
+ case ARM::VST1d64Twb_fixed:
+ case ARM::VST1d8Twb_register:
+ case ARM::VST1d16Twb_register:
+ case ARM::VST1d32Twb_register:
+ case ARM::VST1d64Twb_register:
+ case ARM::VST1d8Qwb_fixed:
+ case ARM::VST1d16Qwb_fixed:
+ case ARM::VST1d32Qwb_fixed:
+ case ARM::VST1d64Qwb_fixed:
+ case ARM::VST1d8Qwb_register:
+ case ARM::VST1d16Qwb_register:
+ case ARM::VST1d32Qwb_register:
+ case ARM::VST1d64Qwb_register:
+ case ARM::VST2d8wb_fixed:
+ case ARM::VST2d16wb_fixed:
+ case ARM::VST2d32wb_fixed:
+ case ARM::VST2d8wb_register:
+ case ARM::VST2d16wb_register:
+ case ARM::VST2d32wb_register:
+ case ARM::VST2q8wb_fixed:
+ case ARM::VST2q16wb_fixed:
+ case ARM::VST2q32wb_fixed:
+ case ARM::VST2q8wb_register:
+ case ARM::VST2q16wb_register:
+ case ARM::VST2q32wb_register:
+ case ARM::VST2b8wb_fixed:
+ case ARM::VST2b16wb_fixed:
+ case ARM::VST2b32wb_fixed:
+ case ARM::VST2b8wb_register:
+ case ARM::VST2b16wb_register:
+ case ARM::VST2b32wb_register:
case ARM::VST3d8_UPD:
case ARM::VST3d16_UPD:
case ARM::VST3d32_UPD:
@@ -2264,34 +2290,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
// Second input register
switch (Inst.getOpcode()) {
- case ARM::VST1d8T:
- case ARM::VST1d16T:
- case ARM::VST1d32T:
- case ARM::VST1d64T:
- case ARM::VST1d8T_UPD:
- case ARM::VST1d16T_UPD:
- case ARM::VST1d32T_UPD:
- case ARM::VST1d64T_UPD:
- case ARM::VST1d8Q:
- case ARM::VST1d16Q:
- case ARM::VST1d32Q:
- case ARM::VST1d64Q:
- case ARM::VST1d8Q_UPD:
- case ARM::VST1d16Q_UPD:
- case ARM::VST1d32Q_UPD:
- case ARM::VST1d64Q_UPD:
- case ARM::VST2d8:
- case ARM::VST2d16:
- case ARM::VST2d32:
- case ARM::VST2d8_UPD:
- case ARM::VST2d16_UPD:
- case ARM::VST2d32_UPD:
- case ARM::VST2q8:
- case ARM::VST2q16:
- case ARM::VST2q32:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
case ARM::VST3d8:
case ARM::VST3d16:
case ARM::VST3d32:
@@ -2307,12 +2305,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
return MCDisassembler::Fail;
break;
- case ARM::VST2b8:
- case ARM::VST2b16:
- case ARM::VST2b32:
- case ARM::VST2b8_UPD:
- case ARM::VST2b16_UPD:
- case ARM::VST2b32_UPD:
case ARM::VST3q8:
case ARM::VST3q16:
case ARM::VST3q32:
@@ -2334,28 +2326,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
// Third input register
switch (Inst.getOpcode()) {
- case ARM::VST1d8T:
- case ARM::VST1d16T:
- case ARM::VST1d32T:
- case ARM::VST1d64T:
- case ARM::VST1d8T_UPD:
- case ARM::VST1d16T_UPD:
- case ARM::VST1d32T_UPD:
- case ARM::VST1d64T_UPD:
- case ARM::VST1d8Q:
- case ARM::VST1d16Q:
- case ARM::VST1d32Q:
- case ARM::VST1d64Q:
- case ARM::VST1d8Q_UPD:
- case ARM::VST1d16Q_UPD:
- case ARM::VST1d32Q_UPD:
- case ARM::VST1d64Q_UPD:
- case ARM::VST2q8:
- case ARM::VST2q16:
- case ARM::VST2q32:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
case ARM::VST3d8:
case ARM::VST3d16:
case ARM::VST3d32:
@@ -2392,20 +2362,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
// Fourth input register
switch (Inst.getOpcode()) {
- case ARM::VST1d8Q:
- case ARM::VST1d16Q:
- case ARM::VST1d32Q:
- case ARM::VST1d64Q:
- case ARM::VST1d8Q_UPD:
- case ARM::VST1d16Q_UPD:
- case ARM::VST1d32Q_UPD:
- case ARM::VST1d64Q_UPD:
- case ARM::VST2q8:
- case ARM::VST2q16:
- case ARM::VST2q32:
- case ARM::VST2q8_UPD:
- case ARM::VST2q16_UPD:
- case ARM::VST2q32_UPD:
case ARM::VST4d8:
case ARM::VST4d16:
case ARM::VST4d32:
@@ -2441,16 +2397,11 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
unsigned align = fieldFromInstruction32(Insn, 4, 1);
unsigned size = fieldFromInstruction32(Insn, 6, 2);
- unsigned regs = fieldFromInstruction32(Insn, 5, 1) + 1;
align *= (1 << size);
if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
- if (regs == 2) {
- if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
- return MCDisassembler::Fail;
- }
if (Rm != 0xF) {
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2460,12 +2411,12 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::CreateImm(align));
- if (Rm == 0xD)
- Inst.addOperand(MCOperand::CreateReg(0));
- else if (Rm != 0xF) {
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
- return MCDisassembler::Fail;
- }
+ // The fixed offset post-increment encodes Rm == 0xd. The no-writeback
+ // variant encodes Rm == 0xf. Anything else is a register offset post-
+ // increment and we need to add the register operand to the instruction.
+ if (Rm != 0xD && Rm != 0xF &&
+ !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
return S;
}
@@ -2693,7 +2644,6 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
unsigned op = fieldFromInstruction32(Insn, 6, 1);
- unsigned length = fieldFromInstruction32(Insn, 8, 2) + 1;
if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2702,10 +2652,8 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail; // Writeback
}
- for (unsigned i = 0; i < length; ++i) {
- if (!Check(S, DecodeDPRRegisterClass(Inst, (Rn+i)%32, Address, Decoder)))
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
- }
if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
@@ -4138,4 +4086,3 @@ static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn,
return S;
}
-
diff --git a/lib/Target/ARM/Disassembler/CMakeLists.txt b/lib/Target/ARM/Disassembler/CMakeLists.txt
index da87751..9de6e5c 100644
--- a/lib/Target/ARM/Disassembler/CMakeLists.txt
+++ b/lib/Target/ARM/Disassembler/CMakeLists.txt
@@ -11,11 +11,3 @@ set_property(
)
endif()
add_dependencies(LLVMARMDisassembler ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMDisassembler
- LLVMARMCodeGen
- LLVMARMDesc
- LLVMARMInfo
- LLVMMC
- LLVMSupport
- )
diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt
index baa9bc3..94075a9 100644
--- a/lib/Target/ARM/Disassembler/LLVMBuild.txt
+++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = ARMDisassembler
parent = ARM
required_libraries = ARMCodeGen ARMDesc ARMInfo MC Support
add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 6c6c021..662097a 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -1029,3 +1029,29 @@ void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
<< getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", "
<< getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "}";
}
+
+void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ // Normally, it's not safe to use register enum values directly with
+ // addition to get the next register, but for VFP registers, the
+ // sort order is guaranteed because they're all of the form D<n>.
+ O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
+ << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}";
+}
+
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 3f38f1a..05db2d2 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -133,6 +133,12 @@ public:
void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
+ void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
};
} // end namespace llvm
diff --git a/lib/Target/ARM/InstPrinter/CMakeLists.txt b/lib/Target/ARM/InstPrinter/CMakeLists.txt
index fa0b495..e2d4819 100644
--- a/lib/Target/ARM/InstPrinter/CMakeLists.txt
+++ b/lib/Target/ARM/InstPrinter/CMakeLists.txt
@@ -5,8 +5,3 @@ add_llvm_library(LLVMARMAsmPrinter
)
add_dependencies(LLVMARMAsmPrinter ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMAsmPrinter
- LLVMMC
- LLVMSupport
- )
diff --git a/lib/Target/ARM/InstPrinter/LLVMBuild.txt b/lib/Target/ARM/InstPrinter/LLVMBuild.txt
index b34aab4..6f4fa36 100644
--- a/lib/Target/ARM/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/ARM/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = ARMAsmPrinter
parent = ARM
required_libraries = MC Support
add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/LLVMBuild.txt b/lib/Target/ARM/LLVMBuild.txt
index 9082539..fd4b3a3 100644
--- a/lib/Target/ARM/LLVMBuild.txt
+++ b/lib/Target/ARM/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
+
[component_0]
type = TargetGroup
name = ARM
@@ -30,4 +33,3 @@ name = ARMCodeGen
parent = ARM
required_libraries = ARMAsmPrinter ARMDesc ARMInfo Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target
add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 62d04c4..bf1f0e8 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -102,6 +102,11 @@ public:
bool MayNeedRelaxation(const MCInst &Inst) const;
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const;
+
void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
@@ -124,14 +129,49 @@ public:
};
} // end anonymous namespace
+static unsigned getRelaxedOpcode(unsigned Op) {
+ switch (Op) {
+ default: return Op;
+ case ARM::tBcc: return ARM::t2Bcc;
+ }
+}
+
bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
- // FIXME: Thumb targets, different move constant targets..
+ if (getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode())
+ return true;
return false;
}
+bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the
+ // low bit being an implied zero. There's an implied +4 offset for the
+ // branch, so we adjust the other way here to determine what's
+ // encodable.
+ //
+ // Relax if the value is too big for a (signed) i8.
+ int64_t Offset = int64_t(Value) - 4;
+ return Offset > 254 || Offset < -256;
+}
+
void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
- assert(0 && "ARMAsmBackend::RelaxInstruction() unimplemented");
- return;
+ unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
+
+ // Sanity check w/ diagnostic if we get here w/ a bogus instruction.
+ if (RelaxedOp == Inst.getOpcode()) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ Inst.dump_pretty(OS);
+ OS << "\n";
+ report_fatal_error("unexpected instruction to relax: " + OS.str());
+ }
+
+ // The instructions we're relaxing have (so far) the same operands.
+ // We just need to update to the proper opcode.
+ Res = Inst;
+ Res.setOpcode(RelaxedOp);
}
bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 865c3e2..c38a882 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -1412,7 +1412,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(Op);
if (MO.getReg() == 0) return 0x0D;
- return MO.getReg();
+ return getARMRegisterNumbering(MO.getReg());
}
unsigned ARMMCCodeEmitter::
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 352c73e..f394b4f 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ErrorHandling.h"
@@ -178,9 +179,16 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
case ARM::fixup_arm_movt_hi16:
case ARM::fixup_arm_movt_hi16_pcrel:
MovtBit = 1;
+ // The thumb bit shouldn't be set in the 'other-half' bit of the
+ // relocation, but it will be set in FixedValue if the base symbol
+ // is a thumb function. Clear it out here.
+ if (A_SD->getFlags() & SF_ThumbFunc)
+ FixedValue &= 0xfffffffe;
break;
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movt_hi16_pcrel:
+ if (A_SD->getFlags() & SF_ThumbFunc)
+ FixedValue &= 0xfffffffe;
MovtBit = 1;
// Fallthrough
case ARM::fixup_t2_movw_lo16:
@@ -189,7 +197,6 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
break;
}
-
if (Type == macho::RIT_ARM_HalfDifference) {
uint32_t OtherHalf = MovtBit
? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
index f529314..f2cf78a 100644
--- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -10,10 +10,3 @@ add_dependencies(LLVMARMDesc ARMCommonTableGen)
# Hack: we need to include 'main' target directory to grab private headers
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
-
-add_llvm_library_dependencies(LLVMARMDesc
- LLVMARMAsmPrinter
- LLVMARMInfo
- LLVMMC
- LLVMSupport
- )
diff --git a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
index 46b11c7..2a7fe61 100644
--- a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = ARMDesc
parent = ARM
required_libraries = ARMAsmPrinter ARMInfo MC Support
add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 2df0053..000a37f 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -139,7 +139,7 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
// FIXME: Detect integer instructions properly.
const MCInstrDesc &MCID = MI->getDesc();
unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
- if (MCID.mayStore())
+ if (MI->mayStore())
return false;
unsigned Opcode = MCID.getOpcode();
if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
@@ -222,14 +222,14 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI));
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID1, TmpReg)
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
.addReg(Src1Reg, getKillRegState(Src1Kill))
.addReg(Src2Reg, getKillRegState(Src2Kill));
if (HasLane)
MIB.addImm(LaneImm);
MIB.addImm(Pred).addReg(PredReg);
- MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID2)
+ MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2)
.addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
if (NegAcc) {
@@ -274,7 +274,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
}
const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.isBarrier()) {
+ if (MI->isBarrier()) {
clearStack();
Skip = 0;
++MII;
diff --git a/lib/Target/ARM/TargetInfo/CMakeLists.txt b/lib/Target/ARM/TargetInfo/CMakeLists.txt
index 8b38b13..533e747 100644
--- a/lib/Target/ARM/TargetInfo/CMakeLists.txt
+++ b/lib/Target/ARM/TargetInfo/CMakeLists.txt
@@ -5,9 +5,3 @@ add_llvm_library(LLVMARMInfo
)
add_dependencies(LLVMARMInfo ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
diff --git a/lib/Target/ARM/TargetInfo/LLVMBuild.txt b/lib/Target/ARM/TargetInfo/LLVMBuild.txt
index 046c1fc..a07a940 100644
--- a/lib/Target/ARM/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/ARM/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = ARMInfo
parent = ARM
required_libraries = MC Support Target
add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index e8ed482..e61c0a7 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -643,14 +643,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
assert(Offset && "This code isn't needed if offset already handled!");
unsigned Opcode = MI.getOpcode();
- const MCInstrDesc &Desc = MI.getDesc();
// Remove predicate first.
int PIdx = MI.findFirstPredOperandIdx();
if (PIdx != -1)
removeOperands(MI, PIdx);
- if (Desc.mayLoad()) {
+ if (MI.mayLoad()) {
// Use the destination register to materialize sp + offset.
unsigned TmpReg = MI.getOperand(0).getReg();
bool UseRR = false;
@@ -673,7 +672,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
// register. The offset is already handled in the vreg value.
MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
- } else if (Desc.mayStore()) {
+ } else if (MI.mayStore()) {
VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
bool UseRR = false;
@@ -699,7 +698,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
// Add predicate back if it's needed.
- if (MI.getDesc().isPredicable()) {
+ if (MI.isPredicable()) {
MachineInstrBuilder MIB(&MI);
AddDefaultPred(MIB);
}
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index b627400..55b4d30 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -13,6 +13,7 @@
#include "Thumb2InstrInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
@@ -141,7 +142,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
// rsb r2, 0
//
const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.hasOptionalDef() &&
+ if (MI->hasOptionalDef() &&
MI->getOperand(MCID.getNumOperands() - 1).getReg() == ARM::CPSR)
return false;
@@ -198,7 +199,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
// Branches, including tricky ones like LDM_RET, need to end an IT
// block so check the instruction we just put in the block.
for (; MBBI != E && Pos &&
- (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) {
+ (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) {
if (MBBI->isDebugValue())
continue;
@@ -237,6 +238,9 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
// Last instruction in IT block kills ITSTATE.
LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
+ // Finalize the bundle.
+ FinalizeBundle(MBB, InsertPos.getInstrIterator(), LastITMI);
+
Modified = true;
++NumITs;
}
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index e5fc8b4..e206288 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -452,7 +452,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit load / store instruction.
DebugLoc dl = MI->getDebugLoc();
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
if (!isLdStMul) {
MIB.addOperand(MI->getOperand(0));
MIB.addOperand(MI->getOperand(1));
@@ -478,7 +478,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
- MBB.erase(MI);
+ MBB.erase_instr(MI);
++NumLdSts;
return true;
}
@@ -513,7 +513,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
return false;
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(),
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
TII->get(ARM::tADDrSPi))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
@@ -525,7 +525,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB);
- MBB.erase(MI);
+ MBB.erase_instr(MI);
++NumNarrows;
return true;
}
@@ -533,8 +533,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
if (Entry.LowRegs1 && !VerifyLowRegs(MI))
return false;
- const MCInstrDesc &MCID = MI->getDesc();
- if (MCID.mayLoad() || MCID.mayStore())
+ if (MI->mayLoad() || MI->mayStore())
return ReduceLoadStore(MBB, MI, Entry);
switch (Opc) {
@@ -654,7 +653,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit instruction.
DebugLoc dl = MI->getDebugLoc();
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
MIB.addOperand(MI->getOperand(0));
if (NewMCID.hasOptionalDef()) {
if (HasCC)
@@ -678,7 +677,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
- MBB.erase(MI);
+ MBB.erase_instr(MI);
++Num2Addrs;
return true;
}
@@ -745,7 +744,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit instruction.
DebugLoc dl = MI->getDebugLoc();
- MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
MIB.addOperand(MI->getOperand(0));
if (NewMCID.hasOptionalDef()) {
if (HasCC)
@@ -785,7 +784,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
- MBB.erase(MI);
+ MBB.erase_instr(MI);
++NumNarrows;
return true;
}
@@ -830,16 +829,22 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
// Yes, CPSR could be livein.
bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
MachineInstr *CPSRDef = 0;
+ MachineInstr *BundleMI = 0;
// If this BB loops back to itself, conservatively avoid narrowing the
// first instruction that does partial flag update.
bool IsSelfLoop = MBB.isSuccessor(&MBB);
- MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
- MachineBasicBlock::iterator NextMII;
+ MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), E = MBB.instr_end();
+ MachineBasicBlock::instr_iterator NextMII;
for (; MII != E; MII = NextMII) {
NextMII = llvm::next(MII);
MachineInstr *MI = &*MII;
+ if (MI->isBundle()) {
+ BundleMI = MI;
+ continue;
+ }
+
LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
unsigned Opcode = MI->getOpcode();
@@ -850,7 +855,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
if (Entry.Special) {
if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
Modified = true;
- MachineBasicBlock::iterator I = prior(NextMII);
+ MachineBasicBlock::instr_iterator I = prior(NextMII);
MI = &*I;
}
goto ProcessNext;
@@ -860,7 +865,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
if (Entry.NarrowOpc2 &&
ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
Modified = true;
- MachineBasicBlock::iterator I = prior(NextMII);
+ MachineBasicBlock::instr_iterator I = prior(NextMII);
MI = &*I;
goto ProcessNext;
}
@@ -869,15 +874,24 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
if (Entry.NarrowOpc1 &&
ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
Modified = true;
- MachineBasicBlock::iterator I = prior(NextMII);
+ MachineBasicBlock::instr_iterator I = prior(NextMII);
MI = &*I;
}
}
ProcessNext:
+ if (LiveCPSR &&
+ NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle() &&
+ BundleMI->killsRegister(ARM::CPSR))
+ // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
+ // marker is only on the BUNDLE instruction. Process the BUNDLE
+ // instruction as we finish with the bundled instruction to work around
+ // the inconsistency.
+ LiveCPSR = false;
+
bool DefCPSR = false;
LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
- if (MI->getDesc().isCall()) {
+ if (MI->isCall()) {
// Calls don't really set CPSR.
CPSRDef = 0;
IsSelfLoop = false;
diff --git a/lib/Target/CBackend/CMakeLists.txt b/lib/Target/CBackend/CMakeLists.txt
index edf8ee7..fa819a4 100644
--- a/lib/Target/CBackend/CMakeLists.txt
+++ b/lib/Target/CBackend/CMakeLists.txt
@@ -2,16 +2,4 @@ add_llvm_target(CBackendCodeGen
CBackend.cpp
)
-add_llvm_library_dependencies(LLVMCBackendCodeGen
- LLVMAnalysis
- LLVMCBackendInfo
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMScalarOpts
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
- )
-
add_subdirectory(TargetInfo)
diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h
index ca346af..8b2286e 100644
--- a/lib/Target/CBackend/CTargetMachine.h
+++ b/lib/Target/CBackend/CTargetMachine.h
@@ -21,10 +21,10 @@ namespace llvm {
struct CTargetMachine : public TargetMachine {
CTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : TargetMachine(T, TT, CPU, FS) {}
+ : TargetMachine(T, TT, CPU, FS, Options) { }
virtual bool addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &Out,
diff --git a/lib/Target/CBackend/LLVMBuild.txt b/lib/Target/CBackend/LLVMBuild.txt
index 851ded9..e64feb0 100644
--- a/lib/Target/CBackend/LLVMBuild.txt
+++ b/lib/Target/CBackend/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = TargetInfo
+
[component_0]
type = TargetGroup
name = CBackend
@@ -26,4 +29,3 @@ name = CBackendCodeGen
parent = CBackend
required_libraries = Analysis CBackendInfo CodeGen Core MC Scalar Support Target TransformUtils
add_to_library_groups = CBackend
-
diff --git a/lib/Target/CBackend/TargetInfo/CMakeLists.txt b/lib/Target/CBackend/TargetInfo/CMakeLists.txt
index 8e616be..6203616 100644
--- a/lib/Target/CBackend/TargetInfo/CMakeLists.txt
+++ b/lib/Target/CBackend/TargetInfo/CMakeLists.txt
@@ -3,9 +3,3 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
add_llvm_library(LLVMCBackendInfo
CBackendTargetInfo.cpp
)
-
-add_llvm_library_dependencies(LLVMCBackendInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
diff --git a/lib/Target/CBackend/TargetInfo/LLVMBuild.txt b/lib/Target/CBackend/TargetInfo/LLVMBuild.txt
index 35752b7..1b47d8e 100644
--- a/lib/Target/CBackend/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/CBackend/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = CBackendInfo
parent = CBackend
required_libraries = MC Support Target
add_to_library_groups = CBackend
-
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 60e2189..22d8c76 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -3,7 +3,6 @@ add_llvm_library(LLVMTarget
Target.cpp
TargetData.cpp
TargetELFWriterInfo.cpp
- TargetFrameLowering.cpp
TargetInstrInfo.cpp
TargetIntrinsicInfo.cpp
TargetLibraryInfo.cpp
@@ -13,12 +12,6 @@ add_llvm_library(LLVMTarget
TargetSubtargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMTarget
- LLVMCore
- LLVMMC
- LLVMSupport
- )
-
foreach(t ${LLVM_TARGETS_TO_BUILD})
message(STATUS "Targeting ${t}")
add_subdirectory(${t})
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
index b442a5c..6c67c2d 100644
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ b/lib/Target/CellSPU/CMakeLists.txt
@@ -23,17 +23,5 @@ add_llvm_target(CellSPUCodeGen
SPUNopFiller.cpp
)
-add_llvm_library_dependencies(LLVMCellSPUCodeGen
- LLVMAsmPrinter
- LLVMCellSPUDesc
- LLVMCellSPUInfo
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/CellSPU/LLVMBuild.txt b/lib/Target/CellSPU/LLVMBuild.txt
index 4ae26b2..277620b 100644
--- a/lib/Target/CellSPU/LLVMBuild.txt
+++ b/lib/Target/CellSPU/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = MCTargetDesc TargetInfo
+
[component_0]
type = TargetGroup
name = CellSPU
@@ -27,4 +30,3 @@ name = CellSPUCodeGen
parent = CellSPU
required_libraries = AsmPrinter CellSPUDesc CellSPUInfo CodeGen Core MC SelectionDAG Support Target
add_to_library_groups = CellSPU
-
diff --git a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
index d41fe93..0027bdb 100644
--- a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
@@ -3,9 +3,4 @@ add_llvm_library(LLVMCellSPUDesc
SPUMCAsmInfo.cpp
)
-add_llvm_library_dependencies(LLVMCellSPUDesc
- LLVMCellSPUInfo
- LLVMMC
- )
-
add_dependencies(LLVMCellSPUDesc CellSPUCommonTableGen)
diff --git a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt
index abc44a2..71e5bbc 100644
--- a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = CellSPUDesc
parent = CellSPU
required_libraries = CellSPUInfo MC
add_to_library_groups = CellSPU
-
diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp
index 093f99f..916f9ba 100644
--- a/lib/Target/CellSPU/SPUFrameLowering.cpp
+++ b/lib/Target/CellSPU/SPUFrameLowering.cpp
@@ -47,7 +47,8 @@ bool SPUFrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
return MFI->getStackSize() &&
- (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects());
+ (MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects());
}
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index d58e49b..dc0d5a6 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -296,12 +296,22 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::CTTZ , MVT::i32, Expand);
setOperationAction(ISD::CTTZ , MVT::i64, Expand);
setOperationAction(ISD::CTTZ , MVT::i128, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i128, Expand);
setOperationAction(ISD::CTLZ , MVT::i8, Promote);
setOperationAction(ISD::CTLZ , MVT::i16, Promote);
setOperationAction(ISD::CTLZ , MVT::i32, Legal);
setOperationAction(ISD::CTLZ , MVT::i64, Expand);
setOperationAction(ISD::CTLZ , MVT::i128, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i128, Expand);
// SPU has a version of select that implements (a&~c)|(b&c), just like
// select ought to work:
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 6940316..1e922a4 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -34,9 +34,10 @@ SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS),
DataLayout(Subtarget.getTargetDataString()),
InstrInfo(*this),
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index 909f12e..0841fee 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -39,7 +39,7 @@ class SPUTargetMachine : public LLVMTargetMachine {
InstrItineraryData InstrItins;
public:
SPUTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
diff --git a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
index 3f2d6b09..6a98f95 100644
--- a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
+++ b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMCellSPUInfo
CellSPUTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMCellSPUInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMCellSPUInfo CellSPUCommonTableGen)
diff --git a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt
index 0710cc3..6937e70 100644
--- a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = CellSPUInfo
parent = CellSPU
required_libraries = MC Support Target
add_to_library_groups = CellSPU
-
diff --git a/lib/Target/CppBackend/CMakeLists.txt b/lib/Target/CppBackend/CMakeLists.txt
index 53f6868..515e1dd 100644
--- a/lib/Target/CppBackend/CMakeLists.txt
+++ b/lib/Target/CppBackend/CMakeLists.txt
@@ -2,11 +2,4 @@ add_llvm_target(CppBackendCodeGen
CPPBackend.cpp
)
-add_llvm_library_dependencies(LLVMCppBackendCodeGen
- LLVMCore
- LLVMCppBackendInfo
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(TargetInfo)
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index a3613b4..92bca6c 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -23,10 +23,10 @@ class formatted_raw_ostream;
struct CPPTargetMachine : public TargetMachine {
CPPTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : TargetMachine(T, TT, CPU, FS) {}
+ : TargetMachine(T, TT, CPU, FS, Options) {}
virtual bool addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &Out,
diff --git a/lib/Target/CppBackend/LLVMBuild.txt b/lib/Target/CppBackend/LLVMBuild.txt
index 77e31c7..122b5e7 100644
--- a/lib/Target/CppBackend/LLVMBuild.txt
+++ b/lib/Target/CppBackend/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = TargetInfo
+
[component_0]
type = TargetGroup
name = CppBackend
@@ -26,4 +29,3 @@ name = CppBackendCodeGen
parent = CppBackend
required_libraries = Core CppBackendInfo Support Target
add_to_library_groups = CppBackend
-
diff --git a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
index 738b215..f82d72e 100644
--- a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
+++ b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
@@ -3,9 +3,3 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
add_llvm_library(LLVMCppBackendInfo
CppBackendTargetInfo.cpp
)
-
-add_llvm_library_dependencies(LLVMCppBackendInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
diff --git a/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt b/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt
index 67a23ba..d4dfc3e 100644
--- a/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = CppBackendInfo
parent = CppBackend
required_libraries = MC Support Target
add_to_library_groups = CppBackend
-
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
new file mode 100644
index 0000000..f8705ee
--- /dev/null
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -0,0 +1,35 @@
+set(LLVM_TARGET_DEFINITIONS Hexagon.td)
+
+tablegen(LLVM HexagonGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM HexagonGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM HexagonGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM HexagonGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM HexagonGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM HexagonGenIntrinsics.inc -gen-tgt-intrinsic)
+add_public_tablegen_target(HexagonCommonTableGen)
+
+add_llvm_target(HexagonCodeGen
+ HexagonAsmPrinter.cpp
+ HexagonCallingConvLower.cpp
+ HexagonCFGOptimizer.cpp
+ HexagonExpandPredSpillCode.cpp
+ HexagonFrameLowering.cpp
+ HexagonHardwareLoops.cpp
+ HexagonInstrInfo.cpp
+ HexagonISelDAGToDAG.cpp
+ HexagonISelLowering.cpp
+ HexagonMCAsmInfo.cpp
+ HexagonOptimizeSZExtends.cpp
+ HexagonRegisterInfo.cpp
+ HexagonRemoveSZExtArgs.cpp
+ HexagonSelectionDAGInfo.cpp
+ HexagonSplitTFRCondSets.cpp
+ HexagonSubtarget.cpp
+ HexagonTargetMachine.cpp
+ HexagonTargetObjectFile.cpp
+ )
+
+add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
+
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
new file mode 100644
index 0000000..a5f2279
--- /dev/null
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -0,0 +1,54 @@
+//=-- Hexagon.h - Top-level interface for Hexagon representation --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Hexagon back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_Hexagon_H
+#define TARGET_Hexagon_H
+
+#include <cassert>
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+ class FunctionPass;
+ class TargetMachine;
+ class HexagonTargetMachine;
+ class raw_ostream;
+
+ FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM);
+ FunctionPass *createHexagonDelaySlotFillerPass(TargetMachine &TM);
+ FunctionPass *createHexagonFPMoverPass(TargetMachine &TM);
+ FunctionPass *createHexagonRemoveExtendOps(HexagonTargetMachine &TM);
+ FunctionPass *createHexagonCFGOptimizer(HexagonTargetMachine &TM);
+
+ FunctionPass* createHexagonSplitTFRCondSets(HexagonTargetMachine &TM);
+ FunctionPass* createHexagonExpandPredSpillCode(HexagonTargetMachine &TM);
+
+ FunctionPass *createHexagonHardwareLoops();
+ FunctionPass *createHexagonOptimizeSZExtends();
+ FunctionPass *createHexagonFixupHwLoops();
+
+} // end namespace llvm;
+
+#define Hexagon_POINTER_SIZE 4
+
+#define Hexagon_PointerSize (Hexagon_POINTER_SIZE)
+#define Hexagon_PointerSize_Bits (Hexagon_POINTER_SIZE * 8)
+#define Hexagon_WordSize Hexagon_PointerSize
+#define Hexagon_WordSize_Bits Hexagon_PointerSize_Bits
+
+// allocframe saves LR and FP on stack before allocating
+// a new stack frame. This takes 8 bytes.
+#define HEXAGON_LRFP_SIZE 8
+
+#endif
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
new file mode 100644
index 0000000..72939e6
--- /dev/null
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -0,0 +1,66 @@
+//===- Hexagon.td - Describe the Hexagon Target Machine ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Hexagon Subtarget features.
+//
+
+
+// Hexagon Archtectures
+def ArchV2 : SubtargetFeature<"v2", "HexagonArchVersion", "V2",
+ "Hexagon v2">;
+def ArchV3 : SubtargetFeature<"v3", "HexagonArchVersion", "V3",
+ "Hexagon v3">;
+def ArchV4 : SubtargetFeature<"v4", "HexagonArchVersion", "V4",
+ "Hexagon v4">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+include "HexagonSchedule.td"
+include "HexagonRegisterInfo.td"
+include "HexagonCallingConv.td"
+include "HexagonInstrInfo.td"
+include "HexagonIntrinsics.td"
+include "HexagonIntrinsicsDerived.td"
+
+
+def HexagonInstrInfo : InstrInfo {
+ // Define how we want to layout our target-specific information field.
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, ProcessorItineraries Itin,
+ list<SubtargetFeature> Features>
+ : Processor<Name, Itin, Features>;
+
+def : Proc<"hexagonv2", HexagonItineraries, [ArchV2]>;
+def : Proc<"hexagonv3", HexagonItineraries, [ArchV2, ArchV3]>;
+def : Proc<"hexagonv4", HexagonItinerariesV4, [ArchV2, ArchV3, ArchV4]>;
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def Hexagon : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = HexagonInstrInfo;
+}
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
new file mode 100644
index 0000000..8f8e804
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -0,0 +1,555 @@
+//===-- HexagonAsmPrinter.cpp - Print machine instrs to Hexagon assembly ----=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to Hexagon assembly language. This printer is
+// the output mechanism used by `llc'.
+//
+// Documentation at http://developer.apple.com/documentation/DeveloperTools/
+// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html
+//
+//===----------------------------------------------------------------------===//
+
+
+#define DEBUG_TYPE "asm-printer"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool> AlignCalls(
+ "hexagon-align-calls", cl::Hidden, cl::init(true),
+ cl::desc("Insert falign after call instruction for Hexagon target"));
+
+
+namespace {
+ class HexagonAsmPrinter : public AsmPrinter {
+ const HexagonSubtarget *Subtarget;
+
+ public:
+ explicit HexagonAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {
+ Subtarget = &TM.getSubtarget<HexagonSubtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "Hexagon Assembly Printer";
+ }
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description. This method returns true if the
+ /// machine instruction was sufficiently described to print it, otherwise it
+ void printInstruction(const MachineInstr *MI, raw_ostream &O);
+ virtual void EmitInstruction(const MachineInstr *MI);
+
+ void printOp(const MachineOperand &MO, raw_ostream &O);
+
+ /// printRegister - Print register according to target requirements.
+ ///
+ void printRegister(const MachineOperand &MO, bool R0AsZero,
+ raw_ostream &O) {
+ unsigned RegNo = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??");
+ O << getRegisterName(RegNo);
+ }
+
+ void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (MO.isReg()) {
+ printRegister(MO, false, OS);
+ } else if (MO.isImm()) {
+ OS << MO.getImm();
+ } else {
+ printOp(MO, OS);
+ }
+ }
+
+
+ bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const;
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS);
+
+
+ void printHexagonImmOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ int value = MI->getOperand(OpNo).getImm();
+ O << value;
+ }
+
+
+ void printHexagonNegImmOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ int value = MI->getOperand(OpNo).getImm();
+ O << -value;
+ }
+
+ void printHexagonMEMriOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO1 = MI->getOperand(OpNo);
+ const MachineOperand &MO2 = MI->getOperand(OpNo+1);
+
+ O << getRegisterName(MO1.getReg())
+ << " + #"
+ << (int) MO2.getImm();
+ }
+
+
+ void printHexagonFrameIndexOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO1 = MI->getOperand(OpNo);
+ const MachineOperand &MO2 = MI->getOperand(OpNo+1);
+
+ O << getRegisterName(MO1.getReg())
+ << ", #"
+ << MO2.getImm();
+ }
+
+ void printBranchOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ // Branches can take an immediate operand. This is used by the branch
+ // selection pass to print $+8, an eight byte displacement from the PC.
+ if (MI->getOperand(OpNo).isImm()) {
+ O << "$+" << MI->getOperand(OpNo).getImm()*4;
+ } else {
+ printOp(MI->getOperand(OpNo), O);
+ }
+ }
+
+ void printCallOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ }
+
+ void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ }
+
+
+ void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ O << "#HI(";
+ if (MI->getOperand(OpNo).isImm()) {
+ printHexagonImmOperand(MI, OpNo, O);
+ } else {
+ printOp(MI->getOperand(OpNo), O);
+ }
+ O << ")";
+ }
+
+ void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ O << "#HI(";
+ if (MI->getOperand(OpNo).isImm()) {
+ printHexagonImmOperand(MI, OpNo, O);
+ } else {
+ printOp(MI->getOperand(OpNo), O);
+ }
+ O << ")";
+ }
+
+ void printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O);
+
+ void printAddrModeBasePlusOffset(const MachineInstr *MI, int OpNo,
+ raw_ostream &O);
+
+ void printGlobalOperand(const MachineInstr *MI, int OpNo, raw_ostream &O);
+ void printJumpTable(const MachineInstr *MI, int OpNo, raw_ostream &O);
+
+ void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const;
+
+ static const char *getRegisterName(unsigned RegNo);
+ };
+
+} // end of anonymous namespace
+
+// Include the auto-generated portion of the assembly writer.
+#include "HexagonGenAsmWriter.inc"
+
+
+void HexagonAsmPrinter::EmitAlignment(unsigned NumBits,
+ const GlobalValue *GV) const {
+
+ // For basic block level alignment, use falign.
+ if (!GV) {
+ OutStreamer.EmitRawText(StringRef("\t.falign"));
+ return;
+ }
+
+ AsmPrinter::EmitAlignment(NumBits, GV);
+}
+
+void HexagonAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate:
+ dbgs() << "printOp() does not handle immediate values\n";
+ abort();
+ return;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_JumpTableIndex:
+ O << *GetJTISymbol(MO.getIndex());
+ // FIXME: PIC relocation model.
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << *GetCPISymbol(MO.getIndex());
+ return;
+ case MachineOperand::MO_ExternalSymbol:
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ // Computing the address of a global symbol, not calling it.
+ O << *Mang->getSymbol(MO.getGlobal());
+ printOffset(MO.getOffset(), O);
+ return;
+ }
+
+ default:
+ O << "<unknown operand type: " << MO.getType() << ">";
+ return;
+ }
+}
+
+
+//
+// isBlockOnlyReachableByFallthrough - We need to override this since the
+// default AsmPrinter does not print labels for any basic block that
+// is only reachable by a fall through. That works for all cases except
+// for the case in which the basic block is reachable by a fall through but
+// through an indirect from a jump table. In this case, the jump table
+// will contain a label not defined by AsmPrinter.
+//
+bool HexagonAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+ if (MBB->hasAddressTaken()) {
+ return false;
+ }
+ return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
+}
+
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &OS) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'c': // Don't print "$" before a global var name or constant.
+ // Hexagon never has a prefix.
+ printOperand(MI, OpNo, OS);
+ return false;
+ case 'L': // Write second word of DImode reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isReg() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isReg())
+ return true;
+ ++OpNo; // Return the high-part.
+ break;
+ case 'I':
+ // Write 'i' if an integer constant, otherwise nothing. Used to print
+ // addi vs add, etc.
+ if (MI->getOperand(OpNo).isImm())
+ OS << "i";
+ return false;
+ }
+ }
+
+ printOperand(MI, OpNo, OS);
+ return false;
+}
+
+bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ const MachineOperand &Base = MI->getOperand(OpNo);
+ const MachineOperand &Offset = MI->getOperand(OpNo+1);
+
+ if (Base.isReg())
+ printOperand(MI, OpNo, O);
+ else
+ assert(0 && "Unimplemented");
+
+ if (Offset.isImm()) {
+ if (Offset.getImm())
+ O << " + #" << Offset.getImm();
+ }
+ else
+ assert(0 && "Unimplemented");
+
+ return false;
+}
+
+void HexagonAsmPrinter::printPredicateOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ raw_ostream &O) {
+ assert(0 && "Unimplemented");
+}
+
+
+/// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to
+/// the current output stream.
+///
+void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream O(Str);
+
+ const MachineFunction* MF = MI->getParent()->getParent();
+ const HexagonMachineFunctionInfo* MFI =
+ (const HexagonMachineFunctionInfo*)
+ MF->getInfo<HexagonMachineFunctionInfo>();
+
+
+
+ // Print a brace for the beginning of the packet.
+ if (MFI->isStartPacket(MI)) {
+ O << "\t{" << '\n';
+ }
+
+ DEBUG( O << "// MI = " << *MI << '\n';);
+
+ // Indent
+ O << "\t";
+
+
+ if (MI->getOpcode() == Hexagon::ENDLOOP0) {
+ if (MFI->isEndPacket(MI) && MFI->isStartPacket(MI)) {
+ O << "\t{ nop }";
+ } else {
+ O << "}";
+ }
+ printInstruction(MI, O);
+ } else if (MI->getOpcode() == Hexagon::STriwt) {
+ //
+ // Handle truncated store on Hexagon.
+ //
+ O << "\tmemw(";
+ printHexagonMEMriOperand(MI, 0, O);
+
+ O << ") = ";
+ unsigned SubRegNum =
+ TM.getRegisterInfo()->getSubReg(MI->getOperand(2)
+ .getReg(), Hexagon::subreg_loreg);
+ const char *SubRegName = getRegisterName(SubRegNum);
+ O << SubRegName << '\n';
+ } else if (MI->getOpcode() == Hexagon::MPYI_rin) {
+ // Handle multipy with -ve constant on Hexagon:
+ // "$dst =- mpyi($src1, #$src2)"
+ printOperand(MI, 0, O);
+ O << " =- mpyi(";
+ printOperand(MI, 1, O);
+ O << ", #";
+ printHexagonNegImmOperand(MI, 2, O);
+ O << ")";
+ } else if (MI->getOpcode() == Hexagon::MEMw_ADDSUBi_indexed_MEM_V4) {
+ //
+ // Handle memw(Rs+u6:2) [+-]= #U5
+ //
+ O << "\tmemw("; printHexagonMEMriOperand(MI, 0, O); O << ") ";
+ int addend = MI->getOperand(2).getImm();
+ if (addend < 0)
+ O << "-= " << "#" << -addend << '\n';
+ else
+ O << "+= " << "#" << addend << '\n';
+ } else if (MI->getOpcode() == Hexagon::MEMw_ADDSUBi_MEM_V4) {
+ //
+ // Handle memw(Rs+u6:2) [+-]= #U5
+ //
+ O << "\tmemw("; printHexagonMEMriOperand(MI, 0, O); O << ") ";
+ int addend = MI->getOperand(2).getImm();
+ if (addend < 0)
+ O << "-= " << "#" << -addend << '\n';
+ else
+ O << "+= " << "#" << addend << '\n';
+ } else if (MI->getOpcode() == Hexagon::MEMh_ADDSUBi_indexed_MEM_V4) {
+ //
+ // Handle memh(Rs+u6:1) [+-]= #U5
+ //
+ O << "\tmemh("; printHexagonMEMriOperand(MI, 0, O); O << ") ";
+ int addend = MI->getOperand(2).getImm();
+ if (addend < 0)
+ O << "-= " << "#" << -addend << '\n';
+ else
+ O << "+= " << "#" << addend << '\n';
+ } else if (MI->getOpcode() == Hexagon::MEMh_ADDSUBi_MEM_V4) {
+ //
+ // Handle memh(Rs+u6:1) [+-]= #U5
+ //
+ O << "\tmemh("; printHexagonMEMriOperand(MI, 0, O); O << ") ";
+ int addend = MI->getOperand(2).getImm();
+ if (addend < 0)
+ O << "-= " << "#" << -addend << '\n';
+ else
+ O << "+= " << "#" << addend << '\n';
+ } else if (MI->getOpcode() == Hexagon::MEMb_ADDSUBi_indexed_MEM_V4) {
+ //
+ // Handle memb(Rs+u6:1) [+-]= #U5
+ //
+ O << "\tmemb("; printHexagonMEMriOperand(MI, 0, O); O << ") ";
+ int addend = MI->getOperand(2).getImm();
+ if (addend < 0)
+ O << "-= " << "#" << -addend << '\n';
+ else
+ O << "+= " << "#" << addend << '\n';
+ } else if (MI->getOpcode() == Hexagon::MEMb_ADDSUBi_MEM_V4) {
+ //
+ // Handle memb(Rs+u6:1) [+-]= #U5
+ //
+ O << "\tmemb("; printHexagonMEMriOperand(MI, 0, O); O << ") ";
+ int addend = MI->getOperand(2).getImm();
+ if (addend < 0)
+ O << "-= " << "#" << -addend << '\n';
+ else
+ O << "+= " << "#" << addend << '\n';
+ } else if (MI->getOpcode() == Hexagon::CMPbGTri_V4) {
+ //
+ // Handle Pd=cmpb.gt(Rs,#s8)
+ //
+ O << "\t";
+ printRegister(MI->getOperand(0), false, O);
+ O << " = cmpb.gt(";
+ printRegister(MI->getOperand(1), false, O);
+ O << ", ";
+ int val = MI->getOperand(2).getImm() >> 24;
+ O << "#" << val << ")" << '\n';
+ } else if (MI->getOpcode() == Hexagon::CMPhEQri_V4) {
+ //
+ // Handle Pd=cmph.eq(Rs,#8)
+ //
+ O << "\t";
+ printRegister(MI->getOperand(0), false, O);
+ O << " = cmph.eq(";
+ printRegister(MI->getOperand(1), false, O);
+ O << ", ";
+ int val = MI->getOperand(2).getImm();
+ assert((((0 <= val) && (val <= 127)) ||
+ ((65408 <= val) && (val <= 65535))) &&
+ "Not in correct range!");
+ if (val >= 65408) val -= 65536;
+ O << "#" << val << ")" << '\n';
+ } else if (MI->getOpcode() == Hexagon::CMPhGTri_V4) {
+ //
+ // Handle Pd=cmph.gt(Rs,#8)
+ //
+ O << "\t";
+ printRegister(MI->getOperand(0), false, O);
+ O << " = cmph.gt(";
+ printRegister(MI->getOperand(1), false, O);
+ O << ", ";
+ int val = MI->getOperand(2).getImm() >> 16;
+ O << "#" << val << ")" << '\n';
+ } else {
+ printInstruction(MI, O);
+ }
+
+ // Print a brace for the end of the packet.
+ if (MFI->isEndPacket(MI) && MI->getOpcode() != Hexagon::ENDLOOP0) {
+ O << "\n\t}" << '\n';
+ }
+
+ if (AlignCalls && MI->getDesc().isCall()) {
+ O << "\n\t.falign" << "\n";
+ }
+
+ OutStreamer.EmitRawText(O.str());
+ return;
+}
+
+/// PrintUnmangledNameSafely - Print out the printable characters in the name.
+/// Don't print things like \n or \0.
+// static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
+// for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
+// Name != E; ++Name)
+// if (isprint(*Name))
+// OS << *Name;
+// }
+
+
+void HexagonAsmPrinter::printAddrModeBasePlusOffset(const MachineInstr *MI,
+ int OpNo, raw_ostream &O) {
+ const MachineOperand &MO1 = MI->getOperand(OpNo);
+ const MachineOperand &MO2 = MI->getOperand(OpNo+1);
+
+ O << getRegisterName(MO1.getReg())
+ << " + #"
+ << MO2.getImm();
+}
+
+
+void HexagonAsmPrinter::printGlobalOperand(const MachineInstr *MI, int OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ assert( (MO.getType() == MachineOperand::MO_GlobalAddress) &&
+ "Expecting global address");
+
+ O << *Mang->getSymbol(MO.getGlobal());
+ if (MO.getOffset() != 0) {
+ O << " + ";
+ O << MO.getOffset();
+ }
+}
+
+void HexagonAsmPrinter::printJumpTable(const MachineInstr *MI, int OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ assert( (MO.getType() == MachineOperand::MO_JumpTableIndex) &&
+ "Expecting jump table index");
+
+ // Hexagon_TODO: Do we need name mangling?
+ O << *GetJTISymbol(MO.getIndex());
+}
+
+extern "C" void LLVMInitializeHexagonAsmPrinter() {
+ RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget);
+}
diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
new file mode 100644
index 0000000..38000e7
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -0,0 +1,240 @@
+//===---- HexagonCFGOptimizer.cpp - CFG optimizations ---------------------===//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#define DEBUG_TYPE "hexagon_cfg"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+#include <iostream>
+
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+namespace {
+
+class HexagonCFGOptimizer : public MachineFunctionPass {
+
+private:
+ HexagonTargetMachine& QTM;
+ const HexagonSubtarget &QST;
+
+ void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*);
+
+ public:
+ static char ID;
+ HexagonCFGOptimizer(HexagonTargetMachine& TM) : MachineFunctionPass(ID),
+ QTM(TM),
+ QST(*TM.getSubtargetImpl()) {}
+
+ const char *getPassName() const {
+ return "Hexagon CFG Optimizer";
+ }
+ bool runOnMachineFunction(MachineFunction &Fn);
+};
+
+
+char HexagonCFGOptimizer::ID = 0;
+
+static bool IsConditionalBranch(int Opc) {
+ return (Opc == Hexagon::JMP_Pred) || (Opc == Hexagon::JMP_PredNot)
+ || (Opc == Hexagon::JMP_PredPt) || (Opc == Hexagon::JMP_PredNotPt);
+}
+
+
+static bool IsUnconditionalJump(int Opc) {
+ return (Opc == Hexagon::JMP);
+}
+
+
+void
+HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI,
+ MachineBasicBlock* NewTarget) {
+ const HexagonInstrInfo *QII = QTM.getInstrInfo();
+ int NewOpcode = 0;
+ switch(MI->getOpcode()) {
+ case Hexagon::JMP_Pred:
+ NewOpcode = Hexagon::JMP_PredNot;
+ break;
+
+ case Hexagon::JMP_PredNot:
+ NewOpcode = Hexagon::JMP_Pred;
+ break;
+
+ case Hexagon::JMP_PredPt:
+ NewOpcode = Hexagon::JMP_PredNotPt;
+ break;
+
+ case Hexagon::JMP_PredNotPt:
+ NewOpcode = Hexagon::JMP_PredPt;
+ break;
+
+ default:
+ assert(0 && "Cannot handle this case");
+ }
+
+ MI->setDesc(QII->get(NewOpcode));
+ MI->getOperand(1).setMBB(NewTarget);
+}
+
+
+bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
+
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock* MBB = MBBb;
+
+ // Traverse the basic block.
+ MachineBasicBlock::iterator MII = MBB->getFirstTerminator();
+ if (MII != MBB->end()) {
+ MachineInstr *MI = MII;
+ int Opc = MI->getOpcode();
+ if (IsConditionalBranch(Opc)) {
+
+ //
+ // (Case 1) Transform the code if the following condition occurs:
+ // BB1: if (p0) jump BB3
+ // ...falls-through to BB2 ...
+ // BB2: jump BB4
+ // ...next block in layout is BB3...
+ // BB3: ...
+ //
+ // Transform this to:
+ // BB1: if (!p0) jump BB4
+ // Remove BB2
+ // BB3: ...
+ //
+ // (Case 2) A variation occurs when BB3 contains a JMP to BB4:
+ // BB1: if (p0) jump BB3
+ // ...falls-through to BB2 ...
+ // BB2: jump BB4
+ // ...other basic blocks ...
+ // BB4:
+ // ...not a fall-thru
+ // BB3: ...
+ // jump BB4
+ //
+ // Transform this to:
+ // BB1: if (!p0) jump BB4
+ // Remove BB2
+ // BB3: ...
+ // BB4: ...
+ //
+ unsigned NumSuccs = MBB->succ_size();
+ MachineBasicBlock::succ_iterator SI = MBB->succ_begin();
+ MachineBasicBlock* FirstSucc = *SI;
+ MachineBasicBlock* SecondSucc = *(++SI);
+ MachineBasicBlock* LayoutSucc = NULL;
+ MachineBasicBlock* JumpAroundTarget = NULL;
+
+ if (MBB->isLayoutSuccessor(FirstSucc)) {
+ LayoutSucc = FirstSucc;
+ JumpAroundTarget = SecondSucc;
+ } else if (MBB->isLayoutSuccessor(SecondSucc)) {
+ LayoutSucc = SecondSucc;
+ JumpAroundTarget = FirstSucc;
+ } else {
+ // Odd case...cannot handle.
+ }
+
+ // The target of the unconditional branch must be JumpAroundTarget.
+ // TODO: If not, we should not invert the unconditional branch.
+ MachineBasicBlock* CondBranchTarget = NULL;
+ if ((MI->getOpcode() == Hexagon::JMP_Pred) ||
+ (MI->getOpcode() == Hexagon::JMP_PredNot)) {
+ CondBranchTarget = MI->getOperand(1).getMBB();
+ }
+
+ if (!LayoutSucc || (CondBranchTarget != JumpAroundTarget)) {
+ continue;
+ }
+
+ if ((NumSuccs == 2) && LayoutSucc && (LayoutSucc->pred_size() == 1)) {
+
+ // Ensure that BB2 has one instruction -- an unconditional jump.
+ if ((LayoutSucc->size() == 1) &&
+ IsUnconditionalJump(LayoutSucc->front().getOpcode())) {
+ MachineBasicBlock* UncondTarget =
+ LayoutSucc->front().getOperand(0).getMBB();
+ // Check if the layout successor of BB2 is BB3.
+ bool case1 = LayoutSucc->isLayoutSuccessor(JumpAroundTarget);
+ bool case2 = JumpAroundTarget->isSuccessor(UncondTarget) &&
+ JumpAroundTarget->size() >= 1 &&
+ IsUnconditionalJump(JumpAroundTarget->back().getOpcode()) &&
+ JumpAroundTarget->pred_size() == 1 &&
+ JumpAroundTarget->succ_size() == 1;
+
+ if (case1 || case2) {
+ InvertAndChangeJumpTarget(MI, UncondTarget);
+ MBB->removeSuccessor(JumpAroundTarget);
+ MBB->addSuccessor(UncondTarget);
+
+ // Remove the unconditional branch in LayoutSucc.
+ LayoutSucc->erase(LayoutSucc->begin());
+ LayoutSucc->removeSuccessor(UncondTarget);
+ LayoutSucc->addSuccessor(JumpAroundTarget);
+
+ // This code performs the conversion for case 2, which moves
+ // the block to the fall-thru case (BB3 in the code above).
+ if (case2 && !case1) {
+ JumpAroundTarget->moveAfter(LayoutSucc);
+ // only move a block if it doesn't have a fall-thru. otherwise
+ // the CFG will be incorrect.
+ if (!UncondTarget->canFallThrough()) {
+ UncondTarget->moveAfter(JumpAroundTarget);
+ }
+ }
+
+ //
+ // Correct live-in information. Is used by post-RA scheduler
+ // The live-in to LayoutSucc is now all values live-in to
+ // JumpAroundTarget.
+ //
+ std::vector<unsigned> OrigLiveIn(LayoutSucc->livein_begin(),
+ LayoutSucc->livein_end());
+ std::vector<unsigned> NewLiveIn(JumpAroundTarget->livein_begin(),
+ JumpAroundTarget->livein_end());
+ for (unsigned i = 0; i < OrigLiveIn.size(); ++i) {
+ LayoutSucc->removeLiveIn(OrigLiveIn[i]);
+ }
+ for (unsigned i = 0; i < NewLiveIn.size(); ++i) {
+ LayoutSucc->addLiveIn(NewLiveIn[i]);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ return true;
+}
+}
+
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonCFGOptimizer(HexagonTargetMachine &TM) {
+ return new HexagonCFGOptimizer(TM);
+}
diff --git a/lib/Target/Hexagon/HexagonCallingConv.td b/lib/Target/Hexagon/HexagonCallingConv.td
new file mode 100644
index 0000000..bd9608b
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCallingConv.td
@@ -0,0 +1,35 @@
+//===- HexagonCallingConv.td - Calling Conventions Hexagon -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the Hexagon architectures.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Hexagon 32-bit C return-value convention.
+def RetCC_Hexagon32 : CallingConv<[
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
+ CCIfType<[i64], CCAssignToReg<[D0, D1, D2]>>,
+
+ // Alternatively, they are assigned to the stack in 4-byte aligned units.
+ CCAssignToStack<4, 4>
+]>;
+
+// Hexagon 32-bit C Calling convention.
+def CC_Hexagon32 : CallingConv<[
+ // All arguments get passed in integer registers if there is space.
+ CCIfType<[i32, i16, i8], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
+ CCIfType<[i64], CCAssignToReg<[D0, D1, D2]>>,
+
+ // Alternatively, they are assigned to the stack in 4-byte aligned units.
+ CCAssignToStack<4, 4>
+]>;
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
new file mode 100644
index 0000000..2e51dbf
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
@@ -0,0 +1,207 @@
+//===-- llvm/CallingConvLower.cpp - Calling Convention lowering -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Hexagon_CCState class, used for lowering and
+// implementing calling conventions. Adapted from the machine independent
+// version of the class (CCState) but this handles calls to varargs functions
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonCallingConvLower.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "Hexagon.h"
+using namespace llvm;
+
+Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg,
+ const TargetMachine &tm,
+ SmallVector<CCValAssign, 16> &locs,
+ LLVMContext &c)
+ : CallingConv(CC), IsVarArg(isVarArg), TM(tm),
+ TRI(*TM.getRegisterInfo()), Locs(locs), Context(c) {
+ // No stack is used.
+ StackOffset = 0;
+
+ UsedRegs.resize((TRI.getNumRegs()+31)/32);
+}
+
+// HandleByVal - Allocate a stack slot large enough to pass an argument by
+// value. The size and alignment information of the argument is encoded in its
+// parameter attribute.
+void Hexagon_CCState::HandleByVal(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ int MinSize, int MinAlign,
+ ISD::ArgFlagsTy ArgFlags) {
+ unsigned Align = ArgFlags.getByValAlign();
+ unsigned Size = ArgFlags.getByValSize();
+ if (MinSize > (int)Size)
+ Size = MinSize;
+ if (MinAlign > (int)Align)
+ Align = MinAlign;
+ unsigned Offset = AllocateStack(Size, Align);
+
+ addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset,
+ LocVT.getSimpleVT(), LocInfo));
+}
+
+/// MarkAllocated - Mark a register and all of its aliases as allocated.
+void Hexagon_CCState::MarkAllocated(unsigned Reg) {
+ UsedRegs[Reg/32] |= 1 << (Reg&31);
+
+ if (const unsigned *RegAliases = TRI.getAliasSet(Reg))
+ for (; (Reg = *RegAliases); ++RegAliases)
+ UsedRegs[Reg/32] |= 1 << (Reg&31);
+}
+
+/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
+/// incorporating info about the formals into this state.
+void
+Hexagon_CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ Hexagon_CCAssignFn Fn,
+ unsigned SretValueInRegs) {
+ unsigned NumArgs = Ins.size();
+ unsigned i = 0;
+
+ // If the function returns a small struct in registers, skip
+ // over the first (dummy) argument.
+ if (SretValueInRegs != 0) {
+ ++i;
+ }
+
+
+ for (; i != NumArgs; ++i) {
+ EVT ArgVT = Ins[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, 0, 0, false)) {
+ dbgs() << "Formal argument #" << i << " has unhandled type "
+ << ArgVT.getEVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
+/// incorporating info about the result values into this state.
+void
+Hexagon_CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ Hexagon_CCAssignFn Fn,
+ unsigned SretValueInRegs) {
+
+ // For Hexagon, Return small structures in registers.
+ if (SretValueInRegs != 0) {
+ if (SretValueInRegs <= 32) {
+ unsigned Reg = Hexagon::R0;
+ addLoc(CCValAssign::getReg(0, MVT::i32, Reg, MVT::i32,
+ CCValAssign::Full));
+ return;
+ }
+ if (SretValueInRegs <= 64) {
+ unsigned Reg = Hexagon::D0;
+ addLoc(CCValAssign::getReg(0, MVT::i64, Reg, MVT::i64,
+ CCValAssign::Full));
+ return;
+ }
+ }
+
+
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ EVT VT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this, -1, -1, false)){
+ dbgs() << "Return operand #" << i << " has unhandled type "
+ << VT.getEVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+
+/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
+/// about the passed values into this state.
+void
+Hexagon_CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg>
+ &Outs,
+ Hexagon_CCAssignFn Fn,
+ int NonVarArgsParams,
+ unsigned SretValueSize) {
+ unsigned NumOps = Outs.size();
+
+ unsigned i = 0;
+ // If the called function returns a small struct in registers, skip
+ // the first actual parameter. We do not want to pass a pointer to
+ // the stack location.
+ if (SretValueSize != 0) {
+ ++i;
+ }
+
+ for (; i != NumOps; ++i) {
+ EVT ArgVT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this,
+ NonVarArgsParams, i+1, false)) {
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << ArgVT.getEVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeCallOperands - Same as above except it takes vectors of types
+/// and argument flags.
+void
+Hexagon_CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+ Hexagon_CCAssignFn Fn) {
+ unsigned NumOps = ArgVTs.size();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ EVT ArgVT = ArgVTs[i];
+ ISD::ArgFlagsTy ArgFlags = Flags[i];
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, -1, -1,
+ false)) {
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << ArgVT.getEVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
+/// incorporating info about the passed values into this state.
+void
+Hexagon_CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+ Hexagon_CCAssignFn Fn,
+ unsigned SretValueInRegs) {
+
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ EVT VT = Ins[i].VT;
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this, -1, -1, false)) {
+ dbgs() << "Call result #" << i << " has unhandled type "
+ << VT.getEVTString() << "\n";
+ abort();
+ }
+ }
+}
+
+/// AnalyzeCallResult - Same as above except it's specialized for calls which
+/// produce a single value.
+void Hexagon_CCState::AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn) {
+ if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this, -1, -1,
+ false)) {
+ dbgs() << "Call result has unhandled type "
+ << VT.getEVTString() << "\n";
+ abort();
+ }
+}
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.h b/lib/Target/Hexagon/HexagonCallingConvLower.h
new file mode 100644
index 0000000..1f601e8
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.h
@@ -0,0 +1,189 @@
+//===-- HexagonCallingConvLower.h - Calling Conventions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon_CCState class, used for lowering
+// and implementing calling conventions. Adapted from the target independent
+// version but this handles calls to varargs functions
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H
+#define LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+
+//
+// Need to handle varargs.
+//
+namespace llvm {
+ class TargetRegisterInfo;
+ class TargetMachine;
+ class Hexagon_CCState;
+ class SDNode;
+
+
+/// Hexagon_CCAssignFn - This function assigns a location for Val, updating
+/// State to reflect the change.
+typedef bool Hexagon_CCAssignFn(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, Hexagon_CCState &State,
+ int NonVarArgsParams,
+ int CurrentParam,
+ bool ForceMem);
+
+
+/// CCState - This class holds information needed while lowering arguments and
+/// return values. It captures which registers are already assigned and which
+/// stack slots are used. It provides accessors to allocate these values.
+class Hexagon_CCState {
+ CallingConv::ID CallingConv;
+ bool IsVarArg;
+ const TargetMachine &TM;
+ const TargetRegisterInfo &TRI;
+ SmallVector<CCValAssign, 16> &Locs;
+ LLVMContext &Context;
+
+ unsigned StackOffset;
+ SmallVector<uint32_t, 16> UsedRegs;
+public:
+ Hexagon_CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &TM,
+ SmallVector<CCValAssign, 16> &locs, LLVMContext &c);
+
+ void addLoc(const CCValAssign &V) {
+ Locs.push_back(V);
+ }
+
+ LLVMContext &getContext() const { return Context; }
+ const TargetMachine &getTarget() const { return TM; }
+ unsigned getCallingConv() const { return CallingConv; }
+ bool isVarArg() const { return IsVarArg; }
+
+ unsigned getNextStackOffset() const { return StackOffset; }
+
+ /// isAllocated - Return true if the specified register (or an alias) is
+ /// allocated.
+ bool isAllocated(unsigned Reg) const {
+ return UsedRegs[Reg/32] & (1 << (Reg&31));
+ }
+
+ /// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
+ /// incorporating info about the formals into this state.
+ void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+ Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
+
+ /// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
+ /// incorporating info about the result values into this state.
+ void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
+
+ /// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
+ /// about the passed values into this state.
+ void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ Hexagon_CCAssignFn Fn, int NonVarArgsParams,
+ unsigned SretValueSize);
+
+ /// AnalyzeCallOperands - Same as above except it takes vectors of types
+ /// and argument flags.
+ void AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+ Hexagon_CCAssignFn Fn);
+
+ /// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
+ /// incorporating info about the passed values into this state.
+ void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+ Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
+
+ /// AnalyzeCallResult - Same as above except it's specialized for calls which
+ /// produce a single value.
+ void AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn);
+
+ /// getFirstUnallocated - Return the first unallocated register in the set, or
+ /// NumRegs if they are all allocated.
+ unsigned getFirstUnallocated(const unsigned *Regs, unsigned NumRegs) const {
+ for (unsigned i = 0; i != NumRegs; ++i)
+ if (!isAllocated(Regs[i]))
+ return i;
+ return NumRegs;
+ }
+
+ /// AllocateReg - Attempt to allocate one register. If it is not available,
+ /// return zero. Otherwise, return the register, marking it and any aliases
+ /// as allocated.
+ unsigned AllocateReg(unsigned Reg) {
+ if (isAllocated(Reg)) return 0;
+ MarkAllocated(Reg);
+ return Reg;
+ }
+
+ /// Version of AllocateReg with extra register to be shadowed.
+ unsigned AllocateReg(unsigned Reg, unsigned ShadowReg) {
+ if (isAllocated(Reg)) return 0;
+ MarkAllocated(Reg);
+ MarkAllocated(ShadowReg);
+ return Reg;
+ }
+
+ /// AllocateReg - Attempt to allocate one of the specified registers. If none
+ /// are available, return zero. Otherwise, return the first one available,
+ /// marking it and any aliases as allocated.
+ unsigned AllocateReg(const unsigned *Regs, unsigned NumRegs) {
+ unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
+ if (FirstUnalloc == NumRegs)
+ return 0; // Didn't find the reg.
+
+ // Mark the register and any aliases as allocated.
+ unsigned Reg = Regs[FirstUnalloc];
+ MarkAllocated(Reg);
+ return Reg;
+ }
+
+ /// Version of AllocateReg with list of registers to be shadowed.
+ unsigned AllocateReg(const unsigned *Regs, const unsigned *ShadowRegs,
+ unsigned NumRegs) {
+ unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
+ if (FirstUnalloc == NumRegs)
+ return 0; // Didn't find the reg.
+
+ // Mark the register and any aliases as allocated.
+ unsigned Reg = Regs[FirstUnalloc], ShadowReg = ShadowRegs[FirstUnalloc];
+ MarkAllocated(Reg);
+ MarkAllocated(ShadowReg);
+ return Reg;
+ }
+
+ /// AllocateStack - Allocate a chunk of stack space with the specified size
+ /// and alignment.
+ unsigned AllocateStack(unsigned Size, unsigned Align) {
+ assert(Align && ((Align-1) & Align) == 0); // Align is power of 2.
+ StackOffset = ((StackOffset + Align-1) & ~(Align-1));
+ unsigned Result = StackOffset;
+ StackOffset += Size;
+ return Result;
+ }
+
+ // HandleByVal - Allocate a stack slot large enough to pass an argument by
+ // value. The size and alignment information of the argument is encoded in its
+ // parameter attribute.
+ void HandleByVal(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags);
+
+private:
+ /// MarkAllocated - Mark a register and all of its aliases as allocated.
+ void MarkAllocated(unsigned Reg);
+};
+
+
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
new file mode 100644
index 0000000..cb73ae0
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -0,0 +1,184 @@
+//===--- HexagonExpandPredSpillCode.cpp - Expand Predicate Spill Code ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===////
+// The Hexagon processor has no instructions that load or store predicate
+// registers directly. So, when these registers must be spilled a general
+// purpose register must be found and the value copied to/from it from/to
+// the predicate register. This code currently does not use the register
+// scavenger mechanism available in the allocator. There are two registers
+// reserved to allow spilling/restoring predicate registers. One is used to
+// hold the predicate value. The other is used when stack frame offsets are
+// too large.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+#include <map>
+#include <iostream>
+
+#include "llvm/Support/CommandLine.h"
+
+
+using namespace llvm;
+
+
+namespace {
+
+class HexagonExpandPredSpillCode : public MachineFunctionPass {
+ HexagonTargetMachine& QTM;
+ const HexagonSubtarget &QST;
+
+ public:
+ static char ID;
+ HexagonExpandPredSpillCode(HexagonTargetMachine& TM) :
+ MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {}
+
+ const char *getPassName() const {
+ return "Hexagon Expand Predicate Spill Code";
+ }
+ bool runOnMachineFunction(MachineFunction &Fn);
+};
+
+
+char HexagonExpandPredSpillCode::ID = 0;
+
+
+bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
+
+ const HexagonInstrInfo *TII = QTM.getInstrInfo();
+ const HexagonRegisterInfo *RegInfo = QTM.getRegisterInfo();
+
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock* MBB = MBBb;
+ // Traverse the basic block.
+ for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+ ++MII) {
+ MachineInstr *MI = MII;
+ int Opc = MI->getOpcode();
+ if (Opc == Hexagon::STriw_pred) {
+ // STriw_pred [R30], ofst, SrcReg;
+ unsigned FP = MI->getOperand(0).getReg();
+ assert(FP == RegInfo->getFrameRegister() &&
+ "Not a Frame Pointer, Nor a Spill Slot");
+ assert(MI->getOperand(1).isImm() && "Not an offset");
+ int Offset = MI->getOperand(1).getImm();
+ int SrcReg = MI->getOperand(2).getReg();
+ assert(Hexagon::PredRegsRegClass.contains(SrcReg) &&
+ "Not a predicate register");
+ if (!TII->isValidOffset(Hexagon::STriw, Offset)) {
+ if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::CONST32_Int_Real),
+ HEXAGON_RESERVED_REG_1).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr),
+ HEXAGON_RESERVED_REG_1)
+ .addReg(FP).addReg(HEXAGON_RESERVED_REG_1);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+ HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::STriw))
+ .addReg(HEXAGON_RESERVED_REG_1)
+ .addImm(0).addReg(HEXAGON_RESERVED_REG_2);
+ } else {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri),
+ HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+ HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::STriw))
+ .addReg(HEXAGON_RESERVED_REG_1)
+ .addImm(0)
+ .addReg(HEXAGON_RESERVED_REG_2);
+ }
+ } else {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+ HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::STriw)).
+ addReg(FP).addImm(Offset).addReg(HEXAGON_RESERVED_REG_2);
+ }
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::LDriw_pred) {
+ // DstReg = LDriw_pred [R30], ofst.
+ int DstReg = MI->getOperand(0).getReg();
+ assert(Hexagon::PredRegsRegClass.contains(DstReg) &&
+ "Not a predicate register");
+ unsigned FP = MI->getOperand(1).getReg();
+ assert(FP == RegInfo->getFrameRegister() &&
+ "Not a Frame Pointer, Nor a Spill Slot");
+ assert(MI->getOperand(2).isImm() && "Not an offset");
+ int Offset = MI->getOperand(2).getImm();
+ if (!TII->isValidOffset(Hexagon::LDriw, Offset)) {
+ if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::CONST32_Int_Real),
+ HEXAGON_RESERVED_REG_1).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr),
+ HEXAGON_RESERVED_REG_1)
+ .addReg(FP)
+ .addReg(HEXAGON_RESERVED_REG_1);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+ HEXAGON_RESERVED_REG_2)
+ .addReg(HEXAGON_RESERVED_REG_1)
+ .addImm(0);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+ DstReg).addReg(HEXAGON_RESERVED_REG_2);
+ } else {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri),
+ HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+ HEXAGON_RESERVED_REG_2)
+ .addReg(HEXAGON_RESERVED_REG_1)
+ .addImm(0);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+ DstReg).addReg(HEXAGON_RESERVED_REG_2);
+ }
+ } else {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+ HEXAGON_RESERVED_REG_2).addReg(FP).addImm(Offset);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+ DstReg).addReg(HEXAGON_RESERVED_REG_2);
+ }
+ MII = MBB->erase(MI);
+ --MII;
+ }
+ }
+ }
+
+ return true;
+}
+
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonExpandPredSpillCode(HexagonTargetMachine &TM) {
+ return new HexagonExpandPredSpillCode(TM);
+}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
new file mode 100644
index 0000000..78e0b1c
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -0,0 +1,333 @@
+//==-- HexagonFrameLowering.cpp - Define frame lowering --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//
+//===----------------------------------------------------------------------===//
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonFrameLowering.h"
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <iostream>
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Function.h"
+using namespace llvm;
+
+static cl::opt<bool> DisableDeallocRet(
+ "disable-hexagon-dealloc-ret",
+ cl::Hidden,
+ cl::desc("Disable Dealloc Return for Hexagon target"));
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void HexagonFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get the alignments provided by the target.
+ unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ // Get the maximum call frame size of all the calls.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+ // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+ // that allocations will be aligned.
+ if (MFI->hasVarSizedObjects())
+ maxCallFrameSize = RoundUpToAlignment(maxCallFrameSize, TargetAlign);
+
+ // Update maximum call frame size.
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+ // Include call frame size in total.
+ FrameSize += maxCallFrameSize;
+
+ // Make sure the frame is aligned.
+ FrameSize = RoundUpToAlignment(FrameSize, TargetAlign);
+
+ // Update frame info.
+ MFI->setStackSize(FrameSize);
+}
+
+
+void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ const HexagonRegisterInfo *QRI =
+ static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo());
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ determineFrameLayout(MF);
+
+ // Check if frame moves are needed for EH.
+ bool needsFrameMoves = MMI.hasDebugInfo() ||
+ !MF.getFunction()->needsUnwindTableEntry();
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ int NumBytes = (int) MFI->getStackSize();
+
+ // LLVM expects allocframe not to be the first instruction in the
+ // basic block.
+ MachineBasicBlock::iterator InsertPt = MBB.begin();
+
+ //
+ // ALLOCA adjust regs. Iterate over ADJDYNALLOC nodes and change the offset.
+ //
+ HexagonMachineFunctionInfo *FuncInfo =
+ MF.getInfo<HexagonMachineFunctionInfo>();
+ const std::vector<MachineInstr*>& AdjustRegs =
+ FuncInfo->getAllocaAdjustInsts();
+ for (std::vector<MachineInstr*>::const_iterator i = AdjustRegs.begin(),
+ e = AdjustRegs.end();
+ i != e; ++i) {
+ MachineInstr* MI = *i;
+ assert((MI->getOpcode() == Hexagon::ADJDYNALLOC) &&
+ "Expected adjust alloca node");
+
+ MachineOperand& MO = MI->getOperand(2);
+ assert(MO.isImm() && "Expected immediate");
+ MO.setImm(MFI->getMaxCallFrameSize());
+ }
+
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+ if (needsFrameMoves) {
+ // Advance CFA. DW_CFA_def_cfa
+ unsigned FPReg = QRI->getFrameRegister();
+ unsigned RAReg = QRI->getRARegister();
+
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(FPReg, -8);
+ Moves.push_back(MachineMove(0, Dst, Src));
+
+ // R31 = (R31 - #4)
+ MachineLocation LRDst(RAReg, -4);
+ MachineLocation LRSrc(RAReg);
+ Moves.push_back(MachineMove(0, LRDst, LRSrc));
+
+ // R30 = (R30 - #8)
+ MachineLocation SPDst(FPReg, -8);
+ MachineLocation SPSrc(FPReg);
+ Moves.push_back(MachineMove(0, SPDst, SPSrc));
+ }
+
+ //
+ // Only insert ALLOCFRAME if we need to.
+ //
+ if (hasFP(MF)) {
+ // Check for overflow.
+ // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
+ const int ALLOCFRAME_MAX = 16384;
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ if (NumBytes >= ALLOCFRAME_MAX) {
+ // Emit allocframe(#0).
+ BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(0);
+
+ // Subtract offset from frame pointer.
+ BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::CONST32_Int_Real),
+ HEXAGON_RESERVED_REG_1).addImm(NumBytes);
+ BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::SUB_rr),
+ QRI->getStackRegister()).
+ addReg(QRI->getStackRegister()).
+ addReg(HEXAGON_RESERVED_REG_1);
+ } else {
+ BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(NumBytes);
+ }
+ }
+}
+// Returns true if MBB has a machine instructions that indicates a tail call
+// in the block.
+bool HexagonFrameLowering::hasTailCall(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ unsigned RetOpcode = MBBI->getOpcode();
+
+ return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext;}
+
+void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ DebugLoc dl = MBBI->getDebugLoc();
+ //
+ // Only insert deallocframe if we need to.
+ //
+ if (hasFP(MF)) {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ MachineBasicBlock::iterator MBBI_end = MBB.end();
+ //
+ // For Hexagon, we don't need the frame size.
+ //
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ int NumBytes = (int) MFI->getStackSize();
+
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ // Replace 'jumpr r31' instruction with dealloc_return for V4 and higher
+ // versions.
+ if (STI.hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPR
+ && !DisableDeallocRet) {
+ // Remove jumpr node.
+ MBB.erase(MBBI);
+ // Add dealloc_return.
+ BuildMI(MBB, MBBI_end, dl, TII.get(Hexagon::DEALLOC_RET_V4))
+ .addImm(NumBytes);
+ } else { // Add deallocframe for V2 and V3.
+ BuildMI(MBB, MBBI, dl, TII.get(Hexagon::DEALLOCFRAME)).addImm(NumBytes);
+ }
+ }
+}
+
+bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const HexagonMachineFunctionInfo *FuncInfo =
+ MF.getInfo<HexagonMachineFunctionInfo>();
+ return (MFI->hasCalls() || (MFI->getStackSize() > 0) ||
+ FuncInfo->hasClobberLR() );
+}
+
+bool
+HexagonFrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction *MF = MBB.getParent();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+ if (CSI.empty()) {
+ return false;
+ }
+
+ // We can only schedule double loads if we spill contiguous callee-saved regs
+ // For instance, we cannot scheduled double-word loads if we spill r24,
+ // r26, and r27.
+ // Hexagon_TODO: We can try to double-word align odd registers for -O2 and
+ // above.
+ bool ContiguousRegs = true;
+
+ for (unsigned i = 0; i < CSI.size(); ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ //
+ // Check if we can use a double-word store.
+ //
+ const unsigned* SuperReg = TRI->getSuperRegisters(Reg);
+
+ // Assume that there is exactly one superreg.
+ assert(SuperReg[0] && !SuperReg[1] && "Expected exactly one superreg");
+ bool CanUseDblStore = false;
+ const TargetRegisterClass* SuperRegClass = 0;
+
+ if (ContiguousRegs && (i < CSI.size()-1)) {
+ const unsigned* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg());
+ assert(SuperRegNext[0] && !SuperRegNext[1] &&
+ "Expected exactly one superreg");
+ SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg[0]);
+ CanUseDblStore = (SuperRegNext[0] == SuperReg[0]);
+ }
+
+
+ if (CanUseDblStore) {
+ TII.storeRegToStackSlot(MBB, MI, SuperReg[0], true,
+ CSI[i+1].getFrameIdx(), SuperRegClass, TRI);
+ MBB.addLiveIn(SuperReg[0]);
+ ++i;
+ } else {
+ // Cannot use a double-word store.
+ ContiguousRegs = false;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RC,
+ TRI);
+ MBB.addLiveIn(Reg);
+ }
+ }
+ return true;
+}
+
+
+bool HexagonFrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+
+ MachineFunction *MF = MBB.getParent();
+ const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+ if (CSI.empty()) {
+ return false;
+ }
+
+ // We can only schedule double loads if we spill contiguous callee-saved regs
+ // For instance, we cannot scheduled double-word loads if we spill r24,
+ // r26, and r27.
+ // Hexagon_TODO: We can try to double-word align odd registers for -O2 and
+ // above.
+ bool ContiguousRegs = true;
+
+ for (unsigned i = 0; i < CSI.size(); ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ //
+ // Check if we can use a double-word load.
+ //
+ const unsigned* SuperReg = TRI->getSuperRegisters(Reg);
+ const TargetRegisterClass* SuperRegClass = 0;
+
+ // Assume that there is exactly one superreg.
+ assert(SuperReg[0] && !SuperReg[1] && "Expected exactly one superreg");
+ bool CanUseDblLoad = false;
+ if (ContiguousRegs && (i < CSI.size()-1)) {
+ const unsigned* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg());
+ assert(SuperRegNext[0] && !SuperRegNext[1] &&
+ "Expected exactly one superreg");
+ SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg[0]);
+ CanUseDblLoad = (SuperRegNext[0] == SuperReg[0]);
+ }
+
+
+ if (CanUseDblLoad) {
+ TII.loadRegFromStackSlot(MBB, MI, SuperReg[0], CSI[i+1].getFrameIdx(),
+ SuperRegClass, TRI);
+ MBB.addLiveIn(SuperReg[0]);
+ ++i;
+ } else {
+ // Cannot use a double-word load.
+ ContiguousRegs = false;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
+ MBB.addLiveIn(Reg);
+ }
+ }
+ return true;
+}
+
+int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ return MF.getFrameInfo()->getObjectOffset(FI);
+}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
new file mode 100644
index 0000000..ad87f11
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -0,0 +1,50 @@
+//=- HexagonFrameLowering.h - Define frame lowering for Hexagon --*- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGON_FRAMEINFO_H
+#define HEXAGON_FRAMEINFO_H
+
+#include "Hexagon.h"
+#include "HexagonSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class HexagonFrameLowering : public TargetFrameLowering {
+private:
+ const HexagonSubtarget &STI;
+ void determineFrameLayout(MachineFunction &MF) const;
+
+public:
+ explicit HexagonFrameLowering(const HexagonSubtarget &sti)
+ : TargetFrameLowering(StackGrowsDown, 8, 0), STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ virtual bool
+ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ virtual bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasTailCall(MachineBasicBlock &MBB) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
new file mode 100644
index 0000000..c1abc4a
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -0,0 +1,644 @@
+//===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass identifies loops where we can generate the Hexagon hardware
+// loop instruction. The hardware loop can perform loop branches with a
+// zero-cycle overhead.
+//
+// The pattern that defines the induction variable can changed depending on
+// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
+// normalizes induction variables, and the Loop Strength Reduction pass
+// run by 'llc' may also make changes to the induction variable.
+// The pattern detected by this phase is due to running Strength Reduction.
+//
+// Criteria for hardware loops:
+// - Countable loops (w/ ind. var for a trip count)
+// - Assumes loops are normalized by IndVarSimplify
+// - Try inner-most loops first
+// - No nested hardware loops.
+// - No function calls in loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hwloops"
+#include "llvm/Constants.h"
+#include "llvm/PassSupport.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <algorithm>
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
+
+namespace {
+ class CountValue;
+ struct HexagonHardwareLoops : public MachineFunctionPass {
+ MachineLoopInfo *MLI;
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+
+ HexagonHardwareLoops() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "Hexagon Hardware Loops"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// getCanonicalInductionVariable - Check to see if the loop has a canonical
+ /// induction variable.
+ /// Should be defined in MachineLoop. Based upon version in class Loop.
+ const MachineInstr *getCanonicalInductionVariable(MachineLoop *L) const;
+
+ /// getTripCount - Return a loop-invariant LLVM register indicating the
+ /// number of times the loop will be executed. If the trip-count cannot
+ /// be determined, this return null.
+ CountValue *getTripCount(MachineLoop *L) const;
+
+ /// isInductionOperation - Return true if the instruction matches the
+ /// pattern for an opertion that defines an induction variable.
+ bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const;
+
+ /// isInvalidOperation - Return true if the instruction is not valid within
+ /// a hardware loop.
+ bool isInvalidLoopOperation(const MachineInstr *MI) const;
+
+ /// containsInavlidInstruction - Return true if the loop contains an
+ /// instruction that inhibits using the hardware loop.
+ bool containsInvalidInstruction(MachineLoop *L) const;
+
+ /// converToHardwareLoop - Given a loop, check if we can convert it to a
+ /// hardware loop. If so, then perform the conversion and return true.
+ bool convertToHardwareLoop(MachineLoop *L);
+
+ };
+
+ char HexagonHardwareLoops::ID = 0;
+
+
+ // CountValue class - Abstraction for a trip count of a loop. A
+ // smaller vesrsion of the MachineOperand class without the concerns
+ // of changing the operand representation.
+ class CountValue {
+ public:
+ enum CountValueType {
+ CV_Register,
+ CV_Immediate
+ };
+ private:
+ CountValueType Kind;
+ union Values {
+ unsigned RegNum;
+ int64_t ImmVal;
+ Values(unsigned r) : RegNum(r) {}
+ Values(int64_t i) : ImmVal(i) {}
+ } Contents;
+ bool isNegative;
+
+ public:
+ CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r),
+ isNegative(neg) {}
+ explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i),
+ isNegative(i < 0) {}
+ CountValueType getType() const { return Kind; }
+ bool isReg() const { return Kind == CV_Register; }
+ bool isImm() const { return Kind == CV_Immediate; }
+ bool isNeg() const { return isNegative; }
+
+ unsigned getReg() const {
+ assert(isReg() && "Wrong CountValue accessor");
+ return Contents.RegNum;
+ }
+ void setReg(unsigned Val) {
+ Contents.RegNum = Val;
+ }
+ int64_t getImm() const {
+ assert(isImm() && "Wrong CountValue accessor");
+ if (isNegative) {
+ return -Contents.ImmVal;
+ }
+ return Contents.ImmVal;
+ }
+ void setImm(int64_t Val) {
+ Contents.ImmVal = Val;
+ }
+
+ void print(raw_ostream &OS, const TargetMachine *TM = 0) const {
+ if (isReg()) { OS << PrintReg(getReg()); }
+ if (isImm()) { OS << getImm(); }
+ }
+ };
+
+ struct HexagonFixupHwLoops : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+
+ HexagonFixupHwLoops() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ /// Maximum distance between the loop instr and the basic block.
+ /// Just an estimate.
+ static const unsigned MAX_LOOP_DISTANCE = 200;
+
+ /// fixupLoopInstrs - Check the offset between each loop instruction and
+ /// the loop basic block to determine if we can use the LOOP instruction
+ /// or if we need to set the LC/SA registers explicitly.
+ bool fixupLoopInstrs(MachineFunction &MF);
+
+ /// convertLoopInstr - Add the instruction to set the LC and SA registers
+ /// explicitly.
+ void convertLoopInstr(MachineFunction &MF,
+ MachineBasicBlock::iterator &MII,
+ RegScavenger &RS);
+
+ };
+
+ char HexagonFixupHwLoops::ID = 0;
+
+} // end anonymous namespace
+
+
+/// isHardwareLoop - Returns true if the instruction is a hardware loop
+/// instruction.
+static bool isHardwareLoop(const MachineInstr *MI) {
+ return MI->getOpcode() == Hexagon::LOOP0_r ||
+ MI->getOpcode() == Hexagon::LOOP0_i;
+}
+
+/// isCompareEquals - Returns true if the instruction is a compare equals
+/// instruction with an immediate operand.
+static bool isCompareEqualsImm(const MachineInstr *MI) {
+ return MI->getOpcode() == Hexagon::CMPEQri;
+}
+
+
+/// createHexagonHardwareLoops - Factory for creating
+/// the hardware loop phase.
+FunctionPass *llvm::createHexagonHardwareLoops() {
+ return new HexagonHardwareLoops();
+}
+
+
+bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
+
+ bool Changed = false;
+
+ // get the loop information
+ MLI = &getAnalysis<MachineLoopInfo>();
+ // get the register information
+ MRI = &MF.getRegInfo();
+ // the target specific instructio info.
+ TII = MF.getTarget().getInstrInfo();
+
+ for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+ I != E; ++I) {
+ MachineLoop *L = *I;
+ if (!L->getParentLoop()) {
+ Changed |= convertToHardwareLoop(L);
+ }
+ }
+
+ return Changed;
+}
+
+/// getCanonicalInductionVariable - Check to see if the loop has a canonical
+/// induction variable. We check for a simple recurrence pattern - an
+/// integer recurrence that decrements by one each time through the loop and
+/// ends at zero. If so, return the phi node that corresponds to it.
+///
+/// Based upon the similar code in LoopInfo except this code is specific to
+/// the machine.
+/// This method assumes that the IndVarSimplify pass has been run by 'opt'.
+///
+const MachineInstr
+*HexagonHardwareLoops::getCanonicalInductionVariable(MachineLoop *L) const {
+ MachineBasicBlock *TopMBB = L->getTopBlock();
+ MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
+ assert(PI != TopMBB->pred_end() &&
+ "Loop must have more than one incoming edge!");
+ MachineBasicBlock *Backedge = *PI++;
+ if (PI == TopMBB->pred_end()) return 0; // dead loop
+ MachineBasicBlock *Incoming = *PI++;
+ if (PI != TopMBB->pred_end()) return 0; // multiple backedges?
+
+ // make sure there is one incoming and one backedge and determine which
+ // is which.
+ if (L->contains(Incoming)) {
+ if (L->contains(Backedge))
+ return 0;
+ std::swap(Incoming, Backedge);
+ } else if (!L->contains(Backedge))
+ return 0;
+
+ // Loop over all of the PHI nodes, looking for a canonical induction variable:
+ // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2".
+ // - The recurrence comes from the backedge.
+ // - the definition is an induction operatio.n
+ for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end();
+ I != E && I->isPHI(); ++I) {
+ const MachineInstr *MPhi = &*I;
+ unsigned DefReg = MPhi->getOperand(0).getReg();
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
+ // Check each operand for the value from the backedge.
+ MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB();
+ if (L->contains(MBB)) { // operands comes from the backedge
+ // Check if the definition is an induction operation.
+ const MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg());
+ if (isInductionOperation(DI, DefReg)) {
+ return MPhi;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/// getTripCount - Return a loop-invariant LLVM value indicating the
+/// number of times the loop will be executed. The trip count can
+/// be either a register or a constant value. If the trip-count
+/// cannot be determined, this returns null.
+///
+/// We find the trip count from the phi instruction that defines the
+/// induction variable. We follow the links to the CMP instruction
+/// to get the trip count.
+///
+/// Based upon getTripCount in LoopInfo.
+///
+CountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const {
+ // Check that the loop has a induction variable.
+ const MachineInstr *IV_Inst = getCanonicalInductionVariable(L);
+ if (IV_Inst == 0) return 0;
+
+ // Canonical loops will end with a 'cmpeq_ri IV, Imm',
+ // if Imm is 0, get the count from the PHI opnd
+ // if Imm is -M, than M is the count
+ // Otherwise, Imm is the count
+ const MachineOperand *IV_Opnd;
+ const MachineOperand *InitialValue;
+ if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
+ InitialValue = &IV_Inst->getOperand(1);
+ IV_Opnd = &IV_Inst->getOperand(3);
+ } else {
+ InitialValue = &IV_Inst->getOperand(3);
+ IV_Opnd = &IV_Inst->getOperand(1);
+ }
+
+ // Look for the cmp instruction to determine if we
+ // can get a useful trip count. The trip count can
+ // be either a register or an immediate. The location
+ // of the value depends upon the type (reg or imm).
+ while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) {
+ const MachineInstr *MI = IV_Opnd->getParent();
+ if (L->contains(MI) && isCompareEqualsImm(MI)) {
+ const MachineOperand &MO = MI->getOperand(2);
+ assert(MO.isImm() && "IV Cmp Operand should be 0");
+ int64_t ImmVal = MO.getImm();
+
+ const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
+ assert(L->contains(IV_DefInstr->getParent()) &&
+ "IV definition should occurs in loop");
+ int64_t iv_value = IV_DefInstr->getOperand(2).getImm();
+
+ if (ImmVal == 0) {
+ // Make sure the induction variable changes by one on each iteration.
+ if (iv_value != 1 && iv_value != -1) {
+ return 0;
+ }
+ return new CountValue(InitialValue->getReg(), iv_value > 0);
+ } else {
+ assert(InitialValue->isReg() && "Expecting register for init value");
+ const MachineInstr *DefInstr = MRI->getVRegDef(InitialValue->getReg());
+ if (DefInstr && DefInstr->getOpcode() == Hexagon::TFRI) {
+ int64_t count = ImmVal - DefInstr->getOperand(1).getImm();
+ if ((count % iv_value) != 0) {
+ return 0;
+ }
+ return new CountValue(count/iv_value);
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/// isInductionOperation - return true if the operation is matches the
+/// pattern that defines an induction variable:
+/// add iv, c
+///
+bool
+HexagonHardwareLoops::isInductionOperation(const MachineInstr *MI,
+ unsigned IVReg) const {
+ return (MI->getOpcode() ==
+ Hexagon::ADD_ri && MI->getOperand(1).getReg() == IVReg);
+}
+
+/// isInvalidOperation - Return true if the operation is invalid within
+/// hardware loop.
+bool
+HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI) const {
+
+ // call is not allowed because the callee may use a hardware loop
+ if (MI->getDesc().isCall()) {
+ return true;
+ }
+ // do not allow nested hardware loops
+ if (isHardwareLoop(MI)) {
+ return true;
+ }
+ // check if the instruction defines a hardware loop register
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() &&
+ (MO.getReg() == Hexagon::LC0 || MO.getReg() == Hexagon::LC1 ||
+ MO.getReg() == Hexagon::SA0 || MO.getReg() == Hexagon::SA0)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/// containsInvalidInstruction - Return true if the loop contains
+/// an instruction that inhibits the use of the hardware loop function.
+///
+bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
+ const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = Blocks[i];
+ for (MachineBasicBlock::iterator
+ MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
+ const MachineInstr *MI = &*MII;
+ if (isInvalidLoopOperation(MI)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// converToHardwareLoop - check if the loop is a candidate for
+/// converting to a hardware loop. If so, then perform the
+/// transformation.
+///
+/// This function works on innermost loops first. A loop can
+/// be converted if it is a counting loop; either a register
+/// value or an immediate.
+///
+/// The code makes several assumptions about the representation
+/// of the loop in llvm.
+bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
+ bool Changed = false;
+ // Process nested loops first.
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+ Changed |= convertToHardwareLoop(*I);
+ }
+ // If a nested loop has been converted, then we can't convert this loop.
+ if (Changed) {
+ return Changed;
+ }
+ // Are we able to determine the trip count for the loop?
+ CountValue *TripCount = getTripCount(L);
+ if (TripCount == 0) {
+ return false;
+ }
+ // Does the loop contain any invalid instructions?
+ if (containsInvalidInstruction(L)) {
+ return false;
+ }
+ MachineBasicBlock *Preheader = L->getLoopPreheader();
+ // No preheader means there's not place for the loop instr.
+ if (Preheader == 0) {
+ return false;
+ }
+ MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
+
+ MachineBasicBlock *LastMBB = L->getExitingBlock();
+ // Don't generate hw loop if the loop has more than one exit.
+ if (LastMBB == 0) {
+ return false;
+ }
+ MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
+
+ // Determine the loop start.
+ MachineBasicBlock *LoopStart = L->getTopBlock();
+ if (L->getLoopLatch() != LastMBB) {
+ // When the exit and latch are not the same, use the latch block as the
+ // start.
+ // The loop start address is used only after the 1st iteration, and the loop
+ // latch may contains instrs. that need to be executed after the 1st iter.
+ LoopStart = L->getLoopLatch();
+ // Make sure the latch is a successor of the exit, otherwise it won't work.
+ if (!LastMBB->isSuccessor(LoopStart)) {
+ return false;
+ }
+ }
+
+ // Convert the loop to a hardware loop
+ DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
+
+ if (TripCount->isReg()) {
+ // Create a copy of the loop count register.
+ MachineFunction *MF = LastMBB->getParent();
+ const TargetRegisterClass *RC =
+ MF->getRegInfo().getRegClass(TripCount->getReg());
+ unsigned CountReg = MF->getRegInfo().createVirtualRegister(RC);
+ BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg());
+ if (TripCount->isNeg()) {
+ unsigned CountReg1 = CountReg;
+ CountReg = MF->getRegInfo().createVirtualRegister(RC);
+ BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+ TII->get(Hexagon::NEG), CountReg).addReg(CountReg1);
+ }
+
+ // Add the Loop instruction to the begining of the loop.
+ BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+ TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg);
+ } else {
+ assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
+ // Add the Loop immediate instruction to the beginning of the loop.
+ int64_t CountImm = TripCount->getImm();
+ BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+ TII->get(Hexagon::LOOP0_i)).addMBB(LoopStart).addImm(CountImm);
+ }
+
+ // Make sure the loop start always has a reference in the CFG. We need to
+ // create a BlockAddress operand to get this mechanism to work both the
+ // MachineBasicBlock and BasicBlock objects need the flag set.
+ LoopStart->setHasAddressTaken();
+ // This line is needed to set the hasAddressTaken flag on the BasicBlock
+ // object
+ BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
+
+ // Replace the loop branch with an endloop instruction.
+ DebugLoc dl = LastI->getDebugLoc();
+ BuildMI(*LastMBB, LastI, dl, TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
+
+ // The loop ends with either:
+ // - a conditional branch followed by an unconditional branch, or
+ // - a conditional branch to the loop start.
+ if (LastI->getOpcode() == Hexagon::JMP_Pred ||
+ LastI->getOpcode() == Hexagon::JMP_PredNot) {
+ // delete one and change/add an uncond. branch to out of the loop
+ MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
+ LastI = LastMBB->erase(LastI);
+ if (!L->contains(BranchTarget)) {
+ if (LastI != LastMBB->end()) {
+ TII->RemoveBranch(*LastMBB);
+ }
+ SmallVector<MachineOperand, 0> Cond;
+ TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, dl);
+ }
+ } else {
+ // Conditional branch to loop start; just delete it.
+ LastMBB->erase(LastI);
+ }
+ delete TripCount;
+
+ ++NumHWLoops;
+ return true;
+}
+
+/// createHexagonFixupHwLoops - Factory for creating the hardware loop
+/// phase.
+FunctionPass *llvm::createHexagonFixupHwLoops() {
+ return new HexagonFixupHwLoops();
+}
+
+bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "****** Hexagon Hardware Loop Fixup ******\n");
+
+ bool Changed = fixupLoopInstrs(MF);
+ return Changed;
+}
+
+/// fixupLoopInsts - For Hexagon, if the loop label is to far from the
+/// loop instruction then we need to set the LC0 and SA0 registers
+/// explicitly instead of using LOOP(start,count). This function
+/// checks the distance, and generates register assignments if needed.
+///
+/// This function makes two passes over the basic blocks. The first
+/// pass computes the offset of the basic block from the start.
+/// The second pass checks all the loop instructions.
+bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
+
+ // Offset of the current instruction from the start.
+ unsigned InstOffset = 0;
+ // Map for each basic block to it's first instruction.
+ DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset;
+
+ // First pass - compute the offset of each basic block.
+ for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
+ MBB != MBBe; ++MBB) {
+ BlockToInstOffset[MBB] = InstOffset;
+ InstOffset += (MBB->size() * 4);
+ }
+
+ // Second pass - check each loop instruction to see if it needs to
+ // be converted.
+ InstOffset = 0;
+ bool Changed = false;
+ RegScavenger RS;
+
+ // Loop over all the basic blocks.
+ for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
+ MBB != MBBe; ++MBB) {
+ InstOffset = BlockToInstOffset[MBB];
+ RS.enterBasicBlock(MBB);
+
+ // Loop over all the instructions.
+ MachineBasicBlock::iterator MIE = MBB->end();
+ MachineBasicBlock::iterator MII = MBB->begin();
+ while (MII != MIE) {
+ if (isHardwareLoop(MII)) {
+ RS.forward(MII);
+ assert(MII->getOperand(0).isMBB() &&
+ "Expect a basic block as loop operand");
+ int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
+ diff = (diff > 0 ? diff : -diff);
+ if ((unsigned)diff > MAX_LOOP_DISTANCE) {
+ // Convert to explicity setting LC0 and SA0.
+ convertLoopInstr(MF, MII, RS);
+ MII = MBB->erase(MII);
+ Changed = true;
+ } else {
+ ++MII;
+ }
+ } else {
+ ++MII;
+ }
+ InstOffset += 4;
+ }
+ }
+
+ return Changed;
+
+}
+
+/// convertLoopInstr - convert a loop instruction to a sequence of instructions
+/// that set the lc and sa register explicitly.
+void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF,
+ MachineBasicBlock::iterator &MII,
+ RegScavenger &RS) {
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ MachineBasicBlock *MBB = MII->getParent();
+ DebugLoc DL = MII->getDebugLoc();
+ unsigned Scratch = RS.scavengeRegister(Hexagon::IntRegsRegisterClass, MII, 0);
+
+ // First, set the LC0 with the trip count.
+ if (MII->getOperand(1).isReg()) {
+ // Trip count is a register
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
+ .addReg(MII->getOperand(1).getReg());
+ } else {
+ // Trip count is an immediate.
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch)
+ .addImm(MII->getOperand(1).getImm());
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
+ .addReg(Scratch);
+ }
+ // Then, set the SA0 with the loop start address.
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch)
+ .addMBB(MII->getOperand(0).getMBB());
+ BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0).addReg(Scratch);
+}
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
new file mode 100644
index 0000000..4deab9f
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -0,0 +1,1495 @@
+//==-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon ----==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the Hexagon target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-isel"
+#include "HexagonISelLowering.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===--------------------------------------------------------------------===//
+/// HexagonDAGToDAGISel - Hexagon specific code to select Hexagon machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class HexagonDAGToDAGISel : public SelectionDAGISel {
+ /// Subtarget - Keep a pointer to the Hexagon Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const HexagonSubtarget &Subtarget;
+
+ // Keep a reference to HexagonTargetMachine.
+ HexagonTargetMachine& TM;
+ const HexagonInstrInfo *TII;
+
+public:
+ explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine)
+ : SelectionDAGISel(targetmachine),
+ Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()),
+ TM(targetmachine),
+ TII(static_cast<const HexagonInstrInfo*>(TM.getInstrInfo())) {
+
+ }
+
+ SDNode *Select(SDNode *N);
+
+ // Complex Pattern Selectors.
+ bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRriS11_2(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectMEMriS11_2(SDValue& Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRriS11_3(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRrr(SDValue &Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRriU6_0(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRriU6_1(SDValue& N, SDValue &R1, SDValue &R2);
+ bool SelectADDRriU6_2(SDValue& N, SDValue &R1, SDValue &R2);
+
+ virtual const char *getPassName() const {
+ return "Hexagon DAG->DAG Pattern Instruction Selection";
+ }
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+ bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset);
+
+ SDNode *SelectLoad(SDNode *N);
+ SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, DebugLoc dl);
+ SDNode *SelectIndexedLoad(LoadSDNode *LD, DebugLoc dl);
+ SDNode *SelectIndexedLoadZeroExtend64(LoadSDNode *LD, unsigned Opcode,
+ DebugLoc dl);
+ SDNode *SelectIndexedLoadSignExtend64(LoadSDNode *LD, unsigned Opcode,
+ DebugLoc dl);
+ SDNode *SelectBaseOffsetStore(StoreSDNode *ST, DebugLoc dl);
+ SDNode *SelectIndexedStore(StoreSDNode *ST, DebugLoc dl);
+ SDNode *SelectStore(SDNode *N);
+ SDNode *SelectSHL(SDNode *N);
+ SDNode *SelectSelect(SDNode *N);
+ SDNode *SelectTruncate(SDNode *N);
+ SDNode *SelectMul(SDNode *N);
+ SDNode *SelectZeroExtend(SDNode *N);
+ SDNode *SelectIntrinsicWOChain(SDNode *N);
+ SDNode *SelectConstant(SDNode *N);
+ SDNode *SelectAdd(SDNode *N);
+
+ // Include the pieces autogenerated from the target description.
+#include "HexagonGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+
+/// createHexagonISelDag - This pass converts a legalized DAG into a
+/// Hexagon-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM) {
+ return new HexagonDAGToDAGISel(TM);
+}
+
+static bool IsS11_0_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<11>(v);
+}
+
+
+static bool IsS11_1_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,1>(v);
+}
+
+
+static bool IsS11_2_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,2>(v);
+}
+
+
+static bool IsS11_3_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,3>(v);
+}
+
+
+static bool IsU6_0_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<6>(v);
+}
+
+
+static bool IsU6_1_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,1>(v);
+}
+
+
+static bool IsU6_2_Offset(SDNode * S) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,2>(v);
+}
+
+
+// Intrinsics that return a a predicate.
+static unsigned doesIntrinsicReturnPredicate(unsigned ID)
+{
+ switch (ID) {
+ default:
+ return 0;
+ case Intrinsic::hexagon_C2_cmpeq:
+ case Intrinsic::hexagon_C2_cmpgt:
+ case Intrinsic::hexagon_C2_cmpgtu:
+ case Intrinsic::hexagon_C2_cmpgtup:
+ case Intrinsic::hexagon_C2_cmpgtp:
+ case Intrinsic::hexagon_C2_cmpeqp:
+ case Intrinsic::hexagon_C2_bitsset:
+ case Intrinsic::hexagon_C2_bitsclr:
+ case Intrinsic::hexagon_C2_cmpeqi:
+ case Intrinsic::hexagon_C2_cmpgti:
+ case Intrinsic::hexagon_C2_cmpgtui:
+ case Intrinsic::hexagon_C2_cmpgei:
+ case Intrinsic::hexagon_C2_cmpgeui:
+ case Intrinsic::hexagon_C2_cmplt:
+ case Intrinsic::hexagon_C2_cmpltu:
+ case Intrinsic::hexagon_C2_bitsclri:
+ case Intrinsic::hexagon_C2_and:
+ case Intrinsic::hexagon_C2_or:
+ case Intrinsic::hexagon_C2_xor:
+ case Intrinsic::hexagon_C2_andn:
+ case Intrinsic::hexagon_C2_not:
+ case Intrinsic::hexagon_C2_orn:
+ case Intrinsic::hexagon_C2_pxfer_map:
+ case Intrinsic::hexagon_C2_any8:
+ case Intrinsic::hexagon_C2_all8:
+ case Intrinsic::hexagon_A2_vcmpbeq:
+ case Intrinsic::hexagon_A2_vcmpbgtu:
+ case Intrinsic::hexagon_A2_vcmpheq:
+ case Intrinsic::hexagon_A2_vcmphgt:
+ case Intrinsic::hexagon_A2_vcmphgtu:
+ case Intrinsic::hexagon_A2_vcmpweq:
+ case Intrinsic::hexagon_A2_vcmpwgt:
+ case Intrinsic::hexagon_A2_vcmpwgtu:
+ case Intrinsic::hexagon_C2_tfrrp:
+ case Intrinsic::hexagon_S2_tstbit_i:
+ case Intrinsic::hexagon_S2_tstbit_r:
+ return 1;
+ }
+}
+
+
+// Intrinsics that have predicate operands.
+static unsigned doesIntrinsicContainPredicate(unsigned ID)
+{
+ switch (ID) {
+ default:
+ return 0;
+ case Intrinsic::hexagon_C2_tfrpr:
+ return Hexagon::TFR_RsPd;
+ case Intrinsic::hexagon_C2_and:
+ return Hexagon::AND_pp;
+ case Intrinsic::hexagon_C2_xor:
+ return Hexagon::XOR_pp;
+ case Intrinsic::hexagon_C2_or:
+ return Hexagon::OR_pp;
+ case Intrinsic::hexagon_C2_not:
+ return Hexagon::NOT_pp;
+ case Intrinsic::hexagon_C2_any8:
+ return Hexagon::ANY_pp;
+ case Intrinsic::hexagon_C2_all8:
+ return Hexagon::ALL_pp;
+ case Intrinsic::hexagon_C2_vitpack:
+ return Hexagon::VITPACK_pp;
+ case Intrinsic::hexagon_C2_mask:
+ return Hexagon::MASK_p;
+ case Intrinsic::hexagon_C2_mux:
+ return Hexagon::MUX_rr;
+
+ // Mapping hexagon_C2_muxir to MUX_pri. This is pretty weird - but
+ // that's how it's mapped in q6protos.h.
+ case Intrinsic::hexagon_C2_muxir:
+ return Hexagon::MUX_ri;
+
+ // Mapping hexagon_C2_muxri to MUX_pir. This is pretty weird - but
+ // that's how it's mapped in q6protos.h.
+ case Intrinsic::hexagon_C2_muxri:
+ return Hexagon::MUX_ir;
+
+ case Intrinsic::hexagon_C2_muxii:
+ return Hexagon::MUX_ii;
+ case Intrinsic::hexagon_C2_vmux:
+ return Hexagon::VMUX_prr64;
+ case Intrinsic::hexagon_S2_valignrb:
+ return Hexagon::VALIGN_rrp;
+ case Intrinsic::hexagon_S2_vsplicerb:
+ return Hexagon::VSPLICE_rrp;
+ }
+}
+
+
+static bool OffsetFitsS11(EVT MemType, int64_t Offset) {
+ if (MemType == MVT::i64 && isShiftedInt<11,3>(Offset)) {
+ return true;
+ }
+ if (MemType == MVT::i32 && isShiftedInt<11,2>(Offset)) {
+ return true;
+ }
+ if (MemType == MVT::i16 && isShiftedInt<11,1>(Offset)) {
+ return true;
+ }
+ if (MemType == MVT::i8 && isInt<11>(Offset)) {
+ return true;
+ }
+ return false;
+}
+
+
+//
+// Try to lower loads of GlobalAdresses into base+offset loads. Custom
+// lowering for GlobalAddress nodes has already turned it into a
+// CONST32.
+//
+SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, DebugLoc dl) {
+ EVT LoadedVT = LD->getMemoryVT();
+ SDValue Chain = LD->getChain();
+ SDNode* Const32 = LD->getBasePtr().getNode();
+ unsigned Opcode = 0;
+
+ if (Const32->getOpcode() == HexagonISD::CONST32 &&
+ ISD::isNormalLoad(LD)) {
+ SDValue Base = Const32->getOperand(0);
+ EVT LoadedVT = LD->getMemoryVT();
+ int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset();
+ if (Offset != 0 && OffsetFitsS11(LoadedVT, Offset)) {
+ MVT PointerTy = TLI.getPointerTy();
+ const GlobalValue* GV =
+ cast<GlobalAddressSDNode>(Base)->getGlobal();
+ SDValue TargAddr =
+ CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0);
+ SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set,
+ dl, PointerTy,
+ TargAddr);
+ // Figure out base + offset opcode
+ if (LoadedVT == MVT::i64) Opcode = Hexagon::LDrid_indexed;
+ else if (LoadedVT == MVT::i32) Opcode = Hexagon::LDriw_indexed;
+ else if (LoadedVT == MVT::i16) Opcode = Hexagon::LDrih_indexed;
+ else if (LoadedVT == MVT::i8) Opcode = Hexagon::LDrib_indexed;
+ else assert (0 && "unknown memory type");
+
+ // Build indexed load.
+ SDValue TargetConstOff = CurDAG->getTargetConstant(Offset, PointerTy);
+ SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
+ LD->getValueType(0),
+ MVT::Other,
+ SDValue(NewBase,0),
+ TargetConstOff,
+ Chain);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ ReplaceUses(LD, Result);
+ return Result;
+ }
+ }
+
+ return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD,
+ unsigned Opcode,
+ DebugLoc dl)
+{
+ SDValue Chain = LD->getChain();
+ EVT LoadedVT = LD->getMemoryVT();
+ SDValue Base = LD->getBasePtr();
+ SDValue Offset = LD->getOffset();
+ SDNode *OffsetNode = Offset.getNode();
+ int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+ SDValue N1 = LD->getOperand(1);
+ SDValue CPTmpN1_0;
+ SDValue CPTmpN1_1;
+ if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
+ N1.getNode()->getValueType(0) == MVT::i32) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val)) {
+ SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
+ MVT::Other, Base, TargetConst,
+ Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl, MVT::i64,
+ SDValue(Result_1, 0));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_2, 0),
+ SDValue(Result_1, 1),
+ SDValue(Result_1, 2)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_2;
+ }
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::Other, Base, TargetConst0,
+ Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl,
+ MVT::i64, SDValue(Result_1, 0));
+ SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl,
+ MVT::i32, Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_2, 0),
+ SDValue(Result_3, 0),
+ SDValue(Result_1, 1)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_2;
+ }
+ return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD,
+ unsigned Opcode,
+ DebugLoc dl)
+{
+ SDValue Chain = LD->getChain();
+ EVT LoadedVT = LD->getMemoryVT();
+ SDValue Base = LD->getBasePtr();
+ SDValue Offset = LD->getOffset();
+ SDNode *OffsetNode = Offset.getNode();
+ int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+ SDValue N1 = LD->getOperand(1);
+ SDValue CPTmpN1_0;
+ SDValue CPTmpN1_1;
+ if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
+ N1.getNode()->getValueType(0) == MVT::i32) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val)) {
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::i32, MVT::Other, Base,
+ TargetConstVal, Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+ TargetConst0);
+ SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+ MVT::i64, MVT::Other,
+ SDValue(Result_2,0),
+ SDValue(Result_1,0));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_3, 0),
+ SDValue(Result_1, 1),
+ SDValue(Result_1, 2)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_3;
+ }
+
+ // Generate an indirect load.
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::Other,
+ Base, TargetConst0, Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+ TargetConst0);
+ SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+ MVT::i64, MVT::Other,
+ SDValue(Result_2,0),
+ SDValue(Result_1,0));
+ // Add offset to base.
+ SDNode* Result_4 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+ Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_3, 0), // Load value.
+ SDValue(Result_4, 0), // New address.
+ SDValue(Result_1, 1)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_3;
+ }
+
+ return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, DebugLoc dl) {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Offset = LD->getOffset();
+ SDNode *OffsetNode = Offset.getNode();
+ // Get the constant value.
+ int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+ EVT LoadedVT = LD->getMemoryVT();
+ unsigned Opcode = 0;
+
+ // Check for zero ext loads.
+ bool zextval = (LD->getExtensionType() == ISD::ZEXTLOAD);
+
+ // Figure out the opcode.
+ if (LoadedVT == MVT::i64) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val))
+ Opcode = Hexagon::POST_LDrid;
+ else
+ Opcode = Hexagon::LDrid;
+ } else if (LoadedVT == MVT::i32) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val))
+ Opcode = Hexagon::POST_LDriw;
+ else
+ Opcode = Hexagon::LDriw;
+ } else if (LoadedVT == MVT::i16) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val))
+ Opcode = zextval ? Hexagon::POST_LDriuh : Hexagon::POST_LDrih;
+ else
+ Opcode = zextval ? Hexagon::LDriuh : Hexagon::LDrih;
+ } else if (LoadedVT == MVT::i8) {
+ if (TII->isValidAutoIncImm(LoadedVT, Val))
+ Opcode = zextval ? Hexagon::POST_LDriub : Hexagon::POST_LDrib;
+ else
+ Opcode = zextval ? Hexagon::LDriub : Hexagon::LDrib;
+ } else
+ assert (0 && "unknown memory type");
+
+ // For zero ext i64 loads, we need to add combine instructions.
+ if (LD->getValueType(0) == MVT::i64 &&
+ LD->getExtensionType() == ISD::ZEXTLOAD) {
+ return SelectIndexedLoadZeroExtend64(LD, Opcode, dl);
+ }
+ if (LD->getValueType(0) == MVT::i64 &&
+ LD->getExtensionType() == ISD::SEXTLOAD) {
+ // Handle sign ext i64 loads.
+ return SelectIndexedLoadSignExtend64(LD, Opcode, dl);
+ }
+ if (TII->isValidAutoIncImm(LoadedVT, Val)) {
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
+ LD->getValueType(0),
+ MVT::i32, MVT::Other, Base,
+ TargetConstVal, Chain);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result, 0),
+ SDValue(Result, 1),
+ SDValue(Result, 2)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result;
+ } else {
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl,
+ LD->getValueType(0),
+ MVT::Other, Base, TargetConst0,
+ Chain);
+ SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+ Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result_1, 0),
+ SDValue(Result_2, 0),
+ SDValue(Result_1, 1)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_1;
+ }
+
+ return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) {
+ SDNode *result;
+ DebugLoc dl = N->getDebugLoc();
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+
+ // Handle indexed loads.
+ if (AM != ISD::UNINDEXED) {
+ result = SelectIndexedLoad(LD, dl);
+ } else {
+ result = SelectBaseOffsetLoad(LD, dl);
+ }
+
+ return result;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) {
+ SDValue Chain = ST->getChain();
+ SDValue Base = ST->getBasePtr();
+ SDValue Offset = ST->getOffset();
+ SDValue Value = ST->getValue();
+ SDNode *OffsetNode = Offset.getNode();
+ // Get the constant value.
+ int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+ EVT StoredVT = ST->getMemoryVT();
+
+ // Offset value must be within representable range
+ // and must have correct alignment properties.
+ if (TII->isValidAutoIncImm(StoredVT, Val)) {
+ SDValue Ops[] = { Value, Base,
+ CurDAG->getTargetConstant(Val, MVT::i32), Chain};
+ unsigned Opcode = 0;
+
+ // Figure out the post inc version of opcode.
+ if (StoredVT == MVT::i64) Opcode = Hexagon::POST_STdri;
+ else if (StoredVT == MVT::i32) Opcode = Hexagon::POST_STwri;
+ else if (StoredVT == MVT::i16) Opcode = Hexagon::POST_SThri;
+ else if (StoredVT == MVT::i8) Opcode = Hexagon::POST_STbri;
+ else assert (0 && "unknown memory type");
+
+ // Build post increment store.
+ SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::Other, Ops, 4);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = ST->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+
+ ReplaceUses(ST, Result);
+ ReplaceUses(SDValue(ST,1), SDValue(Result,1));
+ return Result;
+ }
+
+ // Note: Order of operands matches the def of instruction:
+ // def STrid : STInst<(outs), (ins MEMri:$addr, DoubleRegs:$src1), ...
+ // and it differs for POST_ST* for instance.
+ SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, MVT::i32), Value,
+ Chain};
+ unsigned Opcode = 0;
+
+ // Figure out the opcode.
+ if (StoredVT == MVT::i64) Opcode = Hexagon::STrid;
+ else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw;
+ else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih;
+ else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib;
+ else assert (0 && "unknown memory type");
+
+ // Build regular store.
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops,
+ 4);
+ // Build splitted incriment instruction.
+ SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+ Base,
+ TargetConstVal,
+ SDValue(Result_1, 0));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = ST->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+
+ ReplaceUses(SDValue(ST,0), SDValue(Result_2,0));
+ ReplaceUses(SDValue(ST,1), SDValue(Result_1,0));
+ return Result_2;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST,
+ DebugLoc dl) {
+ SDValue Chain = ST->getChain();
+ SDNode* Const32 = ST->getBasePtr().getNode();
+ SDValue Value = ST->getValue();
+ unsigned Opcode = 0;
+
+ // Try to lower stores of GlobalAdresses into indexed stores. Custom
+ // lowering for GlobalAddress nodes has already turned it into a
+ // CONST32. Avoid truncating stores for the moment. Post-inc stores
+ // do the same. Don't think there's a reason for it, so will file a
+ // bug to fix.
+ if ((Const32->getOpcode() == HexagonISD::CONST32) &&
+ !(Value.getValueType() == MVT::i64 && ST->isTruncatingStore())) {
+ SDValue Base = Const32->getOperand(0);
+ if (Base.getOpcode() == ISD::TargetGlobalAddress) {
+ EVT StoredVT = ST->getMemoryVT();
+ int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset();
+ if (Offset != 0 && OffsetFitsS11(StoredVT, Offset)) {
+ MVT PointerTy = TLI.getPointerTy();
+ const GlobalValue* GV =
+ cast<GlobalAddressSDNode>(Base)->getGlobal();
+ SDValue TargAddr =
+ CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0);
+ SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set,
+ dl, PointerTy,
+ TargAddr);
+
+ // Figure out base + offset opcode
+ if (StoredVT == MVT::i64) Opcode = Hexagon::STrid_indexed;
+ else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw_indexed;
+ else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih_indexed;
+ else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib_indexed;
+ else assert (0 && "unknown memory type");
+
+ SDValue Ops[] = {SDValue(NewBase,0),
+ CurDAG->getTargetConstant(Offset,PointerTy),
+ Value, Chain};
+ // build indexed store
+ SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
+ MVT::Other, Ops, 4);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = ST->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ ReplaceUses(ST, Result);
+ return Result;
+ }
+ }
+ }
+
+ return SelectCode(ST);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ ISD::MemIndexedMode AM = ST->getAddressingMode();
+
+ // Handle indexed stores.
+ if (AM != ISD::UNINDEXED) {
+ return SelectIndexedStore(ST, dl);
+ }
+
+ return SelectBaseOffsetStore(ST, dl);
+}
+
+SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+
+ //
+ // %conv.i = sext i32 %tmp1 to i64
+ // %conv2.i = sext i32 %add to i64
+ // %mul.i = mul nsw i64 %conv2.i, %conv.i
+ //
+ // --- match with the following ---
+ //
+ // %mul.i = mpy (%tmp1, %add)
+ //
+
+ if (N->getValueType(0) == MVT::i64) {
+ // Shifting a i64 signed multiply.
+ SDValue MulOp0 = N->getOperand(0);
+ SDValue MulOp1 = N->getOperand(1);
+
+ SDValue OP0;
+ SDValue OP1;
+
+ // Handle sign_extend and sextload.
+ if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Sext0 = MulOp0.getOperand(0);
+ if (Sext0.getNode()->getValueType(0) != MVT::i32) {
+ SelectCode(N);
+ }
+
+ OP0 = Sext0;
+ } else if (MulOp0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode());
+ if (LD->getMemoryVT() != MVT::i32 ||
+ LD->getExtensionType() != ISD::SEXTLOAD ||
+ LD->getAddressingMode() != ISD::UNINDEXED) {
+ SelectCode(N);
+ }
+
+ SDValue Base = LD->getBasePtr();
+ SDValue Chain = LD->getChain();
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+ MVT::Other,
+ LD->getBasePtr(), TargetConst0,
+ Chain), 0);
+ } else {
+ return SelectCode(N);
+ }
+
+ // Same goes for the second operand.
+ if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Sext1 = MulOp1.getOperand(0);
+ if (Sext1.getNode()->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+
+ OP1 = Sext1;
+ } else if (MulOp1.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode());
+ if (LD->getMemoryVT() != MVT::i32 ||
+ LD->getExtensionType() != ISD::SEXTLOAD ||
+ LD->getAddressingMode() != ISD::UNINDEXED) {
+ return SelectCode(N);
+ }
+
+ SDValue Base = LD->getBasePtr();
+ SDValue Chain = LD->getChain();
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+ MVT::Other,
+ LD->getBasePtr(), TargetConst0,
+ Chain), 0);
+ } else {
+ return SelectCode(N);
+ }
+
+ // Generate a mpy instruction.
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY64, dl, MVT::i64,
+ OP0, OP1);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+
+ return SelectCode(N);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectSelect(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue N0 = N->getOperand(0);
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ SDValue N000 = N00.getOperand(0);
+ SDValue N001 = N00.getOperand(1);
+ if (cast<VTSDNode>(N001)->getVT() == MVT::i16) {
+ SDValue N01 = N0.getOperand(1);
+ SDValue N02 = N0.getOperand(2);
+
+ // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2,
+ // i16:Other),IntRegs:i32:$src1, SETLT:Other),IntRegs:i32:$src1,
+ // IntRegs:i32:$src2)
+ // Emits: (MAXh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2)
+ // Pattern complexity = 9 cost = 1 size = 0.
+ if (cast<CondCodeSDNode>(N02)->get() == ISD::SETLT) {
+ SDValue N1 = N->getOperand(1);
+ if (N01 == N1) {
+ SDValue N2 = N->getOperand(2);
+ if (N000 == N2 &&
+ N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
+ N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
+ SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl,
+ MVT::i32, N000);
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::MAXw_rr, dl,
+ MVT::i32,
+ SDValue(SextNode, 0),
+ N1);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+ }
+
+ // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2,
+ // i16:Other), IntRegs:i32:$src1, SETGT:Other), IntRegs:i32:$src1,
+ // IntRegs:i32:$src2)
+ // Emits: (MINh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2)
+ // Pattern complexity = 9 cost = 1 size = 0.
+ if (cast<CondCodeSDNode>(N02)->get() == ISD::SETGT) {
+ SDValue N1 = N->getOperand(1);
+ if (N01 == N1) {
+ SDValue N2 = N->getOperand(2);
+ if (N000 == N2 &&
+ N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
+ N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
+ SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl,
+ MVT::i32, N000);
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::MINw_rr, dl,
+ MVT::i32,
+ SDValue(SextNode, 0),
+ N1);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return SelectCode(N);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Shift = N->getOperand(0);
+
+ //
+ // %conv.i = sext i32 %tmp1 to i64
+ // %conv2.i = sext i32 %add to i64
+ // %mul.i = mul nsw i64 %conv2.i, %conv.i
+ // %shr5.i = lshr i64 %mul.i, 32
+ // %conv3.i = trunc i64 %shr5.i to i32
+ //
+ // --- match with the following ---
+ //
+ // %conv3.i = mpy (%tmp1, %add)
+ //
+ // Trunc to i32.
+ if (N->getValueType(0) == MVT::i32) {
+ // Trunc from i64.
+ if (Shift.getNode()->getValueType(0) == MVT::i64) {
+ // Trunc child is logical shift right.
+ if (Shift.getOpcode() != ISD::SRL) {
+ return SelectCode(N);
+ }
+
+ SDValue ShiftOp0 = Shift.getOperand(0);
+ SDValue ShiftOp1 = Shift.getOperand(1);
+
+ // Shift by const 32
+ if (ShiftOp1.getOpcode() != ISD::Constant) {
+ return SelectCode(N);
+ }
+
+ int32_t ShiftConst =
+ cast<ConstantSDNode>(ShiftOp1.getNode())->getSExtValue();
+ if (ShiftConst != 32) {
+ return SelectCode(N);
+ }
+
+ // Shifting a i64 signed multiply
+ SDValue Mul = ShiftOp0;
+ if (Mul.getOpcode() != ISD::MUL) {
+ return SelectCode(N);
+ }
+
+ SDValue MulOp0 = Mul.getOperand(0);
+ SDValue MulOp1 = Mul.getOperand(1);
+
+ SDValue OP0;
+ SDValue OP1;
+
+ // Handle sign_extend and sextload
+ if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Sext0 = MulOp0.getOperand(0);
+ if (Sext0.getNode()->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+
+ OP0 = Sext0;
+ } else if (MulOp0.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode());
+ if (LD->getMemoryVT() != MVT::i32 ||
+ LD->getExtensionType() != ISD::SEXTLOAD ||
+ LD->getAddressingMode() != ISD::UNINDEXED) {
+ return SelectCode(N);
+ }
+
+ SDValue Base = LD->getBasePtr();
+ SDValue Chain = LD->getChain();
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+ MVT::Other,
+ LD->getBasePtr(),
+ TargetConst0, Chain), 0);
+ } else {
+ return SelectCode(N);
+ }
+
+ // Same goes for the second operand.
+ if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Sext1 = MulOp1.getOperand(0);
+ if (Sext1.getNode()->getValueType(0) != MVT::i32)
+ return SelectCode(N);
+
+ OP1 = Sext1;
+ } else if (MulOp1.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode());
+ if (LD->getMemoryVT() != MVT::i32 ||
+ LD->getExtensionType() != ISD::SEXTLOAD ||
+ LD->getAddressingMode() != ISD::UNINDEXED) {
+ return SelectCode(N);
+ }
+
+ SDValue Base = LD->getBasePtr();
+ SDValue Chain = LD->getChain();
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+ MVT::Other,
+ LD->getBasePtr(),
+ TargetConst0, Chain), 0);
+ } else {
+ return SelectCode(N);
+ }
+
+ // Generate a mpy instruction.
+ SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY, dl, MVT::i32,
+ OP0, OP1);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+
+ return SelectCode(N);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getValueType(0) == MVT::i32) {
+ SDValue Shl_0 = N->getOperand(0);
+ SDValue Shl_1 = N->getOperand(1);
+ // RHS is const.
+ if (Shl_1.getOpcode() == ISD::Constant) {
+ if (Shl_0.getOpcode() == ISD::MUL) {
+ SDValue Mul_0 = Shl_0.getOperand(0); // Val
+ SDValue Mul_1 = Shl_0.getOperand(1); // Const
+ // RHS of mul is const.
+ if (Mul_1.getOpcode() == ISD::Constant) {
+ int32_t ShlConst =
+ cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue();
+ int32_t MulConst =
+ cast<ConstantSDNode>(Mul_1.getNode())->getSExtValue();
+ int32_t ValConst = MulConst << ShlConst;
+ SDValue Val = CurDAG->getTargetConstant(ValConst,
+ MVT::i32);
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val.getNode()))
+ if (isInt<9>(CN->getSExtValue())) {
+ SDNode* Result =
+ CurDAG->getMachineNode(Hexagon::MPYI_ri, dl,
+ MVT::i32, Mul_0, Val);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+
+ }
+ } else if (Shl_0.getOpcode() == ISD::SUB) {
+ SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
+ SDValue Sub_1 = Shl_0.getOperand(1); // Val
+ if (Sub_0.getOpcode() == ISD::Constant) {
+ int32_t SubConst =
+ cast<ConstantSDNode>(Sub_0.getNode())->getSExtValue();
+ if (SubConst == 0) {
+ if (Sub_1.getOpcode() == ISD::SHL) {
+ SDValue Shl2_0 = Sub_1.getOperand(0); // Val
+ SDValue Shl2_1 = Sub_1.getOperand(1); // Const
+ if (Shl2_1.getOpcode() == ISD::Constant) {
+ int32_t ShlConst =
+ cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue();
+ int32_t Shl2Const =
+ cast<ConstantSDNode>(Shl2_1.getNode())->getSExtValue();
+ int32_t ValConst = 1 << (ShlConst+Shl2Const);
+ SDValue Val = CurDAG->getTargetConstant(-ValConst, MVT::i32);
+ if (ConstantSDNode *CN =
+ dyn_cast<ConstantSDNode>(Val.getNode()))
+ if (isInt<9>(CN->getSExtValue())) {
+ SDNode* Result =
+ CurDAG->getMachineNode(Hexagon::MPYI_ri, dl, MVT::i32,
+ Shl2_0, Val);
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ return SelectCode(N);
+}
+
+
+//
+// If there is an zero_extend followed an intrinsic in DAG (this means - the
+// result of the intrinsic is predicate); convert the zero_extend to
+// transfer instruction.
+//
+// Zero extend -> transfer is lowered here. Otherwise, zero_extend will be
+// converted into a MUX as predicate registers defined as 1 bit in the
+// compiler. Architecture defines them as 8-bit registers.
+// We want to preserve all the lower 8-bits and, not just 1 LSB bit.
+//
+SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ SDNode *IsIntrinsic = N->getOperand(0).getNode();
+ if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) {
+ unsigned ID =
+ cast<ConstantSDNode>(IsIntrinsic->getOperand(0))->getZExtValue();
+ if (doesIntrinsicReturnPredicate(ID)) {
+ // Now we need to differentiate target data types.
+ if (N->getValueType(0) == MVT::i64) {
+ // Convert the zero_extend to Rs = Pd followed by COMBINE_rr(0,Rs).
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl,
+ MVT::i32,
+ SDValue(IsIntrinsic, 0));
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl,
+ MVT::i32,
+ TargetConst0);
+ SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+ MVT::i64, MVT::Other,
+ SDValue(Result_2, 0),
+ SDValue(Result_1, 0));
+ ReplaceUses(N, Result_3);
+ return Result_3;
+ }
+ if (N->getValueType(0) == MVT::i32) {
+ // Convert the zero_extend to Rs = Pd
+ SDNode* RsPd = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl,
+ MVT::i32,
+ SDValue(IsIntrinsic, 0));
+ ReplaceUses(N, RsPd);
+ return RsPd;
+ }
+ assert(0 && "Unexpected value type");
+ }
+ }
+ return SelectCode(N);
+}
+
+
+//
+// Checking for intrinsics which have predicate registers as operand(s)
+// and lowering to the actual intrinsic.
+//
+SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ unsigned ID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned IntrinsicWithPred = doesIntrinsicContainPredicate(ID);
+
+ // We are concerned with only those intrinsics that have predicate registers
+ // as at least one of the operands.
+ if (IntrinsicWithPred) {
+ SmallVector<SDValue, 8> Ops;
+ const MCInstrDesc &MCID = TII->get(IntrinsicWithPred);
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ // Iterate over all the operands of the intrinsics.
+ // For PredRegs, do the transfer.
+ // For Double/Int Regs, just preserve the value
+ // For immediates, lower it.
+ for (unsigned i = 1; i < N->getNumOperands(); ++i) {
+ SDNode *Arg = N->getOperand(i).getNode();
+ const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
+
+ if (RC == Hexagon::IntRegsRegisterClass ||
+ RC == Hexagon::DoubleRegsRegisterClass) {
+ Ops.push_back(SDValue(Arg, 0));
+ } else if (RC == Hexagon::PredRegsRegisterClass) {
+ // Do the transfer.
+ SDNode *PdRs = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1,
+ SDValue(Arg, 0));
+ Ops.push_back(SDValue(PdRs,0));
+ } else if (RC == NULL && (dyn_cast<ConstantSDNode>(Arg) != NULL)) {
+ // This is immediate operand. Lower it here making sure that we DO have
+ // const SDNode for immediate value.
+ int32_t Val = cast<ConstantSDNode>(Arg)->getSExtValue();
+ SDValue SDVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ Ops.push_back(SDVal);
+ } else {
+ assert(0 && "Unimplemented");
+ }
+ }
+ EVT ReturnValueVT = N->getValueType(0);
+ SDNode *Result = CurDAG->getMachineNode(IntrinsicWithPred, dl,
+ ReturnValueVT,
+ Ops.data(), Ops.size());
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ return SelectCode(N);
+}
+
+
+//
+// Map predicate true (encoded as -1 in LLVM) to a XOR.
+//
+SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getValueType(0) == MVT::i1) {
+ SDNode* Result;
+ int32_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+ if (Val == -1) {
+ unsigned NewIntReg = TM.getInstrInfo()->createVR(MF, MVT(MVT::i32));
+ SDValue Reg = CurDAG->getRegister(NewIntReg, MVT::i32);
+
+ // Create the IntReg = 1 node.
+ SDNode* IntRegTFR =
+ CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+ CurDAG->getTargetConstant(0, MVT::i32));
+
+ // Pd = IntReg
+ SDNode* Pd = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1,
+ SDValue(IntRegTFR, 0));
+
+ // not(Pd)
+ SDNode* NotPd = CurDAG->getMachineNode(Hexagon::NOT_pp, dl, MVT::i1,
+ SDValue(Pd, 0));
+
+ // xor(not(Pd))
+ Result = CurDAG->getMachineNode(Hexagon::XOR_pp, dl, MVT::i1,
+ SDValue(Pd, 0), SDValue(NotPd, 0));
+
+ // We have just built:
+ // Rs = Pd
+ // Pd = xor(not(Pd), Pd)
+
+ ReplaceUses(N, Result);
+ return Result;
+ }
+ }
+
+ return SelectCode(N);
+}
+
+
+//
+// Map add followed by a asr -> asr +=.
+//
+SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+ // Identify nodes of the form: add(asr(...)).
+ SDNode* Src1 = N->getOperand(0).getNode();
+ if (Src1->getOpcode() != ISD::SRA || !Src1->hasOneUse()
+ || Src1->getValueType(0) != MVT::i32) {
+ return SelectCode(N);
+ }
+
+ // Build Rd = Rd' + asr(Rs, Rt). The machine constraints will ensure that
+ // Rd and Rd' are assigned to the same register
+ SDNode* Result = CurDAG->getMachineNode(Hexagon::ASR_rr_acc, dl, MVT::i32,
+ N->getOperand(1),
+ Src1->getOperand(0),
+ Src1->getOperand(1));
+ ReplaceUses(N, Result);
+
+ return Result;
+}
+
+
+SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+
+ switch (N->getOpcode()) {
+ case ISD::Constant:
+ return SelectConstant(N);
+
+ case ISD::ADD:
+ return SelectAdd(N);
+
+ case ISD::SHL:
+ return SelectSHL(N);
+
+ case ISD::LOAD:
+ return SelectLoad(N);
+
+ case ISD::STORE:
+ return SelectStore(N);
+
+ case ISD::SELECT:
+ return SelectSelect(N);
+
+ case ISD::TRUNCATE:
+ return SelectTruncate(N);
+
+ case ISD::MUL:
+ return SelectMul(N);
+
+ case ISD::ZERO_EXTEND:
+ return SelectZeroExtend(N);
+
+ case ISD::INTRINSIC_WO_CHAIN:
+ return SelectIntrinsicWOChain(N);
+ }
+
+ return SelectCode(N);
+}
+
+
+//
+// Hexagon_TODO: Five functions for ADDRri?! Surely there must be a better way
+// to define these instructions.
+//
+bool HexagonDAGToDAGISel::SelectADDRri(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_0(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_0_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_0_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_1(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_1_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_1_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_2(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_2_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_2_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriU6_0(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_0_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_0_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriU6_1(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_1_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_1_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriU6_2(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_2_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsU6_2_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectMEMriS11_2(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+
+ if (Addr.getOpcode() != ISD::ADD) {
+ return(SelectADDRriS11_2(Addr, Base, Offset));
+ }
+
+ return SelectADDRriS11_2(Addr, Base, Offset);
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_3(SDValue& Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_3_Offset(Offset.getNode()));
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return (IsS11_3_Offset(Offset.getNode()));
+}
+
+bool HexagonDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1,
+ SDValue &R2) {
+ if (Addr.getOpcode() == ISD::FrameIndex) return false;
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ if (isInt<13>(CN->getSExtValue()))
+ return false; // Let the reg+imm pattern catch this!
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ return true;
+ }
+
+ R1 = Addr;
+
+ return true;
+}
+
+
+// Handle generic address case. It is accessed from inlined asm =m constraints,
+// which could have any kind of pointer.
+bool HexagonDAGToDAGISel::SelectAddr(SDNode *Op, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // Direct calls.
+
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+
+bool HexagonDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+
+ switch (ConstraintCode) {
+ case 'o': // Offsetable.
+ case 'v': // Not offsetable.
+ default: return true;
+ case 'm': // Memory.
+ if (!SelectAddr(Op.getNode(), Op, Op0, Op1))
+ return true;
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
new file mode 100644
index 0000000..0ac3cf0
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -0,0 +1,1505 @@
+//===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that Hexagon uses to lower LLVM code
+// into a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonISelLowering.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonTargetObjectFile.h"
+#include "HexagonSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/Support/CommandLine.h"
+
+const unsigned Hexagon_MAX_RET_SIZE = 64;
+using namespace llvm;
+
+static cl::opt<bool>
+EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden,
+ cl::desc("Control jump table emission on Hexagon target"));
+
+int NumNamedVarArgParams = -1;
+
+// Implement calling convention for Hexagon.
+static bool
+CC_Hexagon(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ // NumNamedVarArgParams can not be zero for a VarArg function.
+ assert ( (NumNamedVarArgParams > 0) &&
+ "NumNamedVarArgParams is not bigger than zero.");
+
+ if ( (int)ValNo < NumNamedVarArgParams ) {
+ // Deal with named arguments.
+ return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State);
+ }
+
+ // Deal with un-named arguments.
+ unsigned ofst;
+ if (ArgFlags.isByVal()) {
+ // If pass-by-value, the size allocated on stack is decided
+ // by ArgFlags.getByValSize(), not by the size of LocVT.
+ assert ((ArgFlags.getByValSize() > 8) &&
+ "ByValSize must be bigger than 8 bytes");
+ ofst = State.AllocateStack(ArgFlags.getByValSize(), 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::i32) {
+ ofst = State.AllocateStack(4, 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::i64) {
+ ofst = State.AllocateStack(8, 8);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ llvm_unreachable(0);
+
+ return true;
+}
+
+
+static bool
+CC_Hexagon (unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ if (ArgFlags.isByVal()) {
+ // Passed on stack.
+ assert ((ArgFlags.getByValSize() > 8) &&
+ "ByValSize must be bigger than 8 bytes");
+ unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+ }
+
+ if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
+ LocVT = MVT::i32;
+ ValVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ }
+
+ if (LocVT == MVT::i32) {
+ if (!CC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
+ if (LocVT == MVT::i64) {
+ if (!CC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
+ return true; // CC didn't match.
+}
+
+
+static bool CC_Hexagon32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ static const unsigned RegList[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5
+ };
+ if (unsigned Reg = State.AllocateReg(RegList, 6)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+
+ unsigned Offset = State.AllocateStack(4, 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+
+ static const unsigned RegList1[] = {
+ Hexagon::D1, Hexagon::D2
+ };
+ static const unsigned RegList2[] = {
+ Hexagon::R1, Hexagon::R3
+ };
+ if (unsigned Reg = State.AllocateReg(RegList1, RegList2, 2)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+
+ unsigned Offset = State.AllocateStack(8, 8, Hexagon::D2);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+
+ if (LocVT == MVT::i1 ||
+ LocVT == MVT::i8 ||
+ LocVT == MVT::i16) {
+ LocVT = MVT::i32;
+ ValVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ }
+
+ if (LocVT == MVT::i32) {
+ if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
+ if (LocVT == MVT::i64) {
+ if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
+ return true; // CC didn't match.
+}
+
+static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ if (LocVT == MVT::i32) {
+ if (unsigned Reg = State.AllocateReg(Hexagon::R0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ unsigned Offset = State.AllocateStack(4, 4);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ if (LocVT == MVT::i64) {
+ if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ unsigned Offset = State.AllocateStack(8, 8);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+SDValue
+HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
+const {
+ return SDValue();
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size". Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter. Sometimes what we are copying is the end of a
+/// larger object, the part that does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ DebugLoc dl) {
+
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*isVolatile=*/false, /*AlwaysInline=*/false,
+ MachinePointerInfo(), MachinePointerInfo());
+}
+
+
+// LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
+// passed by value, the function prototype is modified to return void and
+// the value is stored in memory pointed by a pointer passed by caller.
+SDValue
+HexagonTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ // CCValAssign - represent the assignment of the return value to locations.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analyze return values of ISD::RET
+ CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
+
+ SDValue StackPtr = DAG.getRegister(TM.getRegisterInfo()->getStackRegister(),
+ MVT::i32);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ SDValue Ret = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
+
+ // Guarantee that all emitted copies are stuck together with flags.
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.getNode())
+ return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+
+ return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+
+
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers. This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered. Returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDValue
+HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const
+ SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDValue Callee) const {
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl,
+ RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+/// LowerCall - Functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+SDValue
+HexagonTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // Check for varargs.
+ NumNamedVarArgParams = -1;
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee))
+ {
+ const Function* CalleeFn = NULL;
+ Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, MVT::i32);
+ if ((CalleeFn = dyn_cast<Function>(GA->getGlobal())))
+ {
+ // If a function has zero args and is a vararg function, that's
+ // disallowed so it must be an undeclared function. Do not assume
+ // varargs if the callee is undefined.
+ if (CalleeFn->isVarArg() &&
+ CalleeFn->getFunctionType()->getNumParams() != 0) {
+ NumNamedVarArgParams = CalleeFn->getFunctionType()->getNumParams();
+ }
+ }
+ }
+
+ if (NumNamedVarArgParams > 0)
+ CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg);
+ else
+ CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
+
+
+ if(isTailCall) {
+ bool StructAttrFlag =
+ DAG.getMachineFunction().getFunction()->hasStructRetAttr();
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ isVarArg, IsStructRet,
+ StructAttrFlag,
+ Outs, OutVals, Ins, DAG);
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i){
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isMemLoc()) {
+ isTailCall = false;
+ break;
+ }
+ }
+ if (isTailCall) {
+ DEBUG(dbgs () << "Eligible for Tail Call\n");
+ } else {
+ DEBUG(dbgs () <<
+ "Argument must be passed on stack. Not eligible for Tail Call\n");
+ }
+ }
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+ SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ SDValue StackPtr =
+ DAG.getCopyFromReg(Chain, dl, TM.getRegisterInfo()->getStackRegister(),
+ getPointerTy());
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default:
+ // Loc info must be one of Full, SExt, ZExt, or AExt.
+ assert(0 && "Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isMemLoc()) {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getConstant(LocMemOffset, StackPtr.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+
+ if (Flags.isByVal()) {
+ // The argument is a struct passed by value. According to LLVM, "Arg"
+ // is is pointer.
+ MemOpChains.push_back(CreateCopyOfByValArgument(Arg, PtrOff, Chain,
+ Flags, DAG, dl));
+ } else {
+ // The argument is not passed by value. "Arg" is a buildin type. It is
+ // not a pointer.
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),false, false,
+ 0));
+ }
+ continue;
+ }
+
+ // Arguments that can be passed on register must be kept at RegsToPass
+ // vector.
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ }
+ }
+
+ // Transform all store nodes into one single node because all store
+ // nodes are independent of each other.
+ if (!MemOpChains.empty()) {
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0],
+ MemOpChains.size());
+ }
+
+ if (!isTailCall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes,
+ getPointerTy(), true));
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emited instructions must be
+ // stuck together.
+ SDValue InFlag;
+ if (!isTailCall) {
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ }
+
+ // For tail calls lower the arguments to the 'real' stack slot.
+ if (isTailCall) {
+ // Force all the incoming stack arguments to be loaded from the stack
+ // before any new outgoing arguments are stored to the stack, because the
+ // outgoing stack slots may alias the incoming argument stack slots, and
+ // the alias isn't otherwise explicit. This is slightly more conservative
+ // than necessary, because it means that each store effectively depends
+ // on every argument instead of just those arguments it would clobber.
+ //
+ // Do not flag preceeding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ InFlag =SDValue();
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (flag_aligned_memcpy) {
+ const char *MemcpyName =
+ "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes";
+ Callee =
+ DAG.getTargetExternalSymbol(MemcpyName, getPointerTy());
+ flag_aligned_memcpy = false;
+ } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
+ } else if (ExternalSymbolSDNode *S =
+ dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+ }
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+ }
+
+ if (InFlag.getNode()) {
+ Ops.push_back(InFlag);
+ }
+
+ if (isTailCall)
+ return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+
+ Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+ InVals, OutVals, Callee);
+}
+
+static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
+ bool isSEXTLoad, SDValue &Base,
+ SDValue &Offset, bool &isInc,
+ SelectionDAG &DAG) {
+ if (Ptr->getOpcode() != ISD::ADD)
+ return false;
+
+ if (VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ // Ensure that Offset is a constant.
+ return (isa<ConstantSDNode>(Offset));
+ }
+
+ return false;
+}
+
+// TODO: Put this function along with the other isS* functions in
+// HexagonISelDAGToDAG.cpp into a common file. Or better still, use the
+// functions defined in HexagonImmediates.td.
+static bool Is_PostInc_S4_Offset(SDNode * S, int ShiftAmount) {
+ ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+ // immS4 predicate - True if the immediate fits in a 4-bit sign extended.
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ int64_t m = 0;
+ if (ShiftAmount > 0) {
+ m = v % ShiftAmount;
+ v = v >> ShiftAmount;
+ }
+ return (v <= 7) && (v >= -8) && (m == 0);
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const
+{
+ EVT VT;
+ SDValue Ptr;
+ bool isSEXTLoad = false;
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ VT = LD->getMemoryVT();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ VT = ST->getMemoryVT();
+ if (ST->getValue().getValueType() == MVT::i64 && ST->isTruncatingStore()) {
+ return false;
+ }
+ } else {
+ return false;
+ }
+
+ bool isInc;
+ bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ // ShiftAmount = number of left-shifted bits in the Hexagon instruction.
+ int ShiftAmount = VT.getSizeInBits() / 16;
+ if (isLegal && Is_PostInc_S4_Offset(Offset.getNode(), ShiftAmount)) {
+ AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+ return true;
+ }
+
+ return false;
+}
+
+SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDNode *Node = Op.getNode();
+ MachineFunction &MF = DAG.getMachineFunction();
+ HexagonMachineFunctionInfo *FuncInfo =
+ MF.getInfo<HexagonMachineFunctionInfo>();
+ switch (Node->getOpcode()) {
+ case ISD::INLINEASM: {
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the flag operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ if (FuncInfo->hasClobberLR())
+ break;
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ ++i; // Skip the ID value.
+
+ switch (InlineAsm::getKind(Flags)) {
+ default: llvm_unreachable("Bad flags!");
+ case InlineAsm::Kind_RegDef:
+ case InlineAsm::Kind_RegUse:
+ case InlineAsm::Kind_Imm:
+ case InlineAsm::Kind_Clobber:
+ case InlineAsm::Kind_Mem: {
+ for (; NumVals; --NumVals, ++i) {}
+ break;
+ }
+ case InlineAsm::Kind_RegDefEarlyClobber: {
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg =
+ cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+
+ // Check it to be lr
+ if (Reg == TM.getRegisterInfo()->getRARegister()) {
+ FuncInfo->setHasClobberLR(true);
+ break;
+ }
+ }
+ break;
+ }
+ }
+ }
+ }
+ } // Node->getOpcode
+ return Op;
+}
+
+
+//
+// Taken from the XCore backend.
+//
+SDValue HexagonTargetLowering::
+LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Table = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+ unsigned JTI = JT->getIndex();
+ MachineFunction &MF = DAG.getMachineFunction();
+ const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
+
+ // Mark all jump table targets as address taken.
+ const std::vector<MachineJumpTableEntry> &JTE = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JTE[JTI].MBBs;
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ MBB->setHasAddressTaken();
+ // This line is needed to set the hasAddressTaken flag on the BasicBlock
+ // object.
+ BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock()));
+ }
+
+ SDValue JumpTableBase = DAG.getNode(HexagonISD::WrapperJT, dl,
+ getPointerTy(), TargetJT);
+ SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
+ DAG.getConstant(2, MVT::i32));
+ SDValue JTAddress = DAG.getNode(ISD::ADD, dl, MVT::i32, JumpTableBase,
+ ShiftIndex);
+ SDValue LoadTarget = DAG.getLoad(MVT::i32, dl, Chain, JTAddress,
+ MachinePointerInfo(), false, false, false,
+ 0);
+ return DAG.getNode(HexagonISD::BR_JT, dl, MVT::Other, Chain, LoadTarget);
+}
+
+
+SDValue
+HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ unsigned SPReg = getStackPointerRegisterToSaveRestore();
+
+ // Get a reference to the stack pointer.
+ SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32);
+
+ // Subtract the dynamic size from the actual stack size to
+ // obtain the new stack size.
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size);
+
+ //
+ // For Hexagon, the outgoing memory arguments area should be on top of the
+ // alloca area on the stack i.e., the outgoing memory arguments should be
+ // at a lower address than the alloca area. Move the alloca area down the
+ // stack by adding back the space reserved for outgoing arguments to SP
+ // here.
+ //
+ // We do not know what the size of the outgoing args is at this point.
+ // So, we add a pseudo instruction ADJDYNALLOC that will adjust the
+ // stack pointer. We patch this instruction with the correct, known
+ // offset in emitPrologue().
+ //
+ // Use a placeholder immediate (zero) for now. This will be patched up
+ // by emitPrologue().
+ SDValue ArgAdjust = DAG.getNode(HexagonISD::ADJDYNALLOC, dl,
+ MVT::i32,
+ Sub,
+ DAG.getConstant(0, MVT::i32));
+
+ // The Sub result contains the new stack start address, so it
+ // must be placed in the stack pointer register.
+ SDValue CopyChain = DAG.getCopyToReg(Chain, dl,
+ TM.getRegisterInfo()->getStackRegister(),
+ Sub);
+
+ SDValue Ops[2] = { ArgAdjust, CopyChain };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue
+HexagonTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const
+ SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ HexagonMachineFunctionInfo *FuncInfo =
+ MF.getInfo<HexagonMachineFunctionInfo>();
+
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
+
+ // For LLVM, in the case when returning a struct by value (>8byte),
+ // the first argument is a pointer that points to the location on caller's
+ // stack where the return value will be stored. For Hexagon, the location on
+ // caller's stack is passed only when the struct size is smaller than (and
+ // equal to) 8 bytes. If not, no address will be passed into callee and
+ // callee return the result direclty through R0/R1.
+
+ SmallVector<SDValue, 4> MemOps;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ unsigned ObjSize;
+ unsigned StackLocation;
+ int FI;
+
+ if ( (VA.isRegLoc() && !Flags.isByVal())
+ || (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() > 8)) {
+ // Arguments passed in registers
+ // 1. int, long long, ptr args that get allocated in register.
+ // 2. Large struct that gets an register to put its address in.
+ EVT RegVT = VA.getLocVT();
+ if (RegVT == MVT::i8 || RegVT == MVT::i16 || RegVT == MVT::i32) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(Hexagon::IntRegsRegisterClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ } else if (RegVT == MVT::i64) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(Hexagon::DoubleRegsRegisterClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ } else {
+ assert (0);
+ }
+ } else if (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() <= 8) {
+ assert (0 && "ByValSize must be bigger than 8 bytes");
+ } else {
+ // Sanity check.
+ assert(VA.isMemLoc());
+
+ if (Flags.isByVal()) {
+ // If it's a byval parameter, then we need to compute the
+ // "real" size, not the size of the pointer.
+ ObjSize = Flags.getByValSize();
+ } else {
+ ObjSize = VA.getLocVT().getStoreSizeInBits() >> 3;
+ }
+
+ StackLocation = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
+ // Create the frame index object for this incoming parameter...
+ FI = MFI->CreateFixedObject(ObjSize, StackLocation, true);
+
+ // Create the SelectionDAG nodes cordl, responding to a load
+ // from this parameter.
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+
+ if (Flags.isByVal()) {
+ // If it's a pass-by-value aggregate, then do not dereference the stack
+ // location. Instead, we should generate a reference to the stack
+ // location.
+ InVals.push_back(FIN);
+ } else {
+ InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+ MachinePointerInfo(), false, false,
+ false, 0));
+ }
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0],
+ MemOps.size());
+
+ if (isVarArg) {
+ // This will point to the next argument passed via stack.
+ int FrameIndex = MFI->CreateFixedObject(Hexagon_PointerSize,
+ HEXAGON_LRFP_SIZE +
+ CCInfo.getNextStackOffset(),
+ true);
+ FuncInfo->setVarArgsFrameIndex(FrameIndex);
+ }
+
+ return Chain;
+}
+
+SDValue
+HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+ // VASTART stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ MachineFunction &MF = DAG.getMachineFunction();
+ HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
+ SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), Op.getDebugLoc(), Addr,
+ Op.getOperand(1), MachinePointerInfo(SV), false,
+ false, 0);
+}
+
+SDValue
+HexagonTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDNode* OpNode = Op.getNode();
+
+ SDValue Cond = DAG.getNode(ISD::SETCC, Op.getDebugLoc(), MVT::i1,
+ Op.getOperand(2), Op.getOperand(3),
+ Op.getOperand(4));
+ return DAG.getNode(ISD::SELECT, Op.getDebugLoc(), OpNode->getValueType(0),
+ Cond, Op.getOperand(0),
+ Op.getOperand(1));
+}
+
+SDValue
+HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ if (Depth) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset = DAG.getConstant(4, MVT::i32);
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+ MachinePointerInfo(), false, false, false, 0);
+ }
+
+ // Return LR, which contains the return address. Mark it an implicit live-in.
+ unsigned Reg = MF.addLiveIn(TRI->getRARegister(), getRegClassFor(MVT::i32));
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+}
+
+SDValue
+HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+ const HexagonRegisterInfo *TRI = TM.getRegisterInfo();
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ TRI->getFrameRegister(), VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ return FrameAddr;
+}
+
+
+SDValue HexagonTargetLowering::LowerMEMBARRIER(SDValue Op,
+ SelectionDAG& DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
+}
+
+
+SDValue HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG& DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
+}
+
+
+SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Result;
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+ DebugLoc dl = Op.getDebugLoc();
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
+
+ HexagonTargetObjectFile &TLOF =
+ (HexagonTargetObjectFile&)getObjFileLowering();
+ if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
+ return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result);
+ }
+
+ return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result);
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
+ &targetmachine)
+ : TargetLowering(targetmachine, new HexagonTargetObjectFile()),
+ TM(targetmachine) {
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, Hexagon::IntRegsRegisterClass);
+ addRegisterClass(MVT::i64, Hexagon::DoubleRegsRegisterClass);
+
+ addRegisterClass(MVT::i1, Hexagon::PredRegsRegisterClass);
+
+ computeRegisterProperties();
+
+ // Align loop entry
+ setPrefLoopAlignment(4);
+
+ // Limits for inline expansion of memcpy/memmove
+ maxStoresPerMemcpy = 6;
+ maxStoresPerMemmove = 6;
+
+ //
+ // Library calls for unsupported operations
+ //
+ setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
+
+ setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
+ setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
+ setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
+ setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
+
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
+ setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
+
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
+ setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
+
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
+ setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
+ setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
+ setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
+ setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
+
+ setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
+
+ setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setLibcallName(RTLIB::SREM_I32, "__hexagon_umodsi3");
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
+ setOperationAction(ISD::SDIV, MVT::i64, Expand);
+ setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+
+ setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
+ setOperationAction(ISD::UDIV, MVT::i64, Expand);
+
+ setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+ setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
+ setOperationAction(ISD::FDIV, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
+ setOperationAction(ISD::FDIV, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2");
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
+ setOperationAction(ISD::FADD, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
+ setOperationAction(ISD::FADD, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
+ setOperationAction(ISD::FADD, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
+ setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
+ setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
+ setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
+ setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
+ setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
+ setCondCodeAction(ISD::SETOGT, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
+ setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
+ setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
+ setCondCodeAction(ISD::SETOLT, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
+ setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
+ setOperationAction(ISD::FMUL, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
+ setOperationAction(ISD::MUL, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
+ setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
+
+
+ setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
+ setOperationAction(ISD::SUB, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
+ setOperationAction(ISD::SUB, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2");
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2");
+ setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2");
+ setCondCodeAction(ISD::SETO, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
+ setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
+
+ setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2");
+ setCondCodeAction(ISD::SETO, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2");
+ setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
+
+ setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i64, Legal);
+
+ setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);
+ setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);
+ setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
+ setIndexedStoreAction(ISD::POST_INC, MVT::i64, Legal);
+
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+
+ // Turn FP extload into load/fextend.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ // Hexagon has a i1 sign extending load.
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand);
+ // Turn FP truncstore into trunc + store.
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // Custom legalize GlobalAddress nodes into CONST32.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i8, Custom);
+ // Truncate action?
+ setOperationAction(ISD::TRUNCATE, MVT::i64, Expand);
+
+ // Hexagon doesn't have sext_inreg, replace them with shl/sra.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+
+ // Hexagon has no REM or DIVREM operations.
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+
+ // Expand fp<->uint.
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+ // Hexagon has no select or setcc: expand to SELECT_CC.
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+
+ // Lower SELECT_CC to SETCC and SELECT.
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+ // This is a workaround documented in DAGCombiner.cpp:2892 We don't
+ // support SELECT_CC on every type.
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ if (EmitJumpTables) {
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+ } else {
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ }
+
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL , MVT::i32, Expand);
+ setOperationAction(ISD::ROTR , MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+
+ setOperationAction(ISD::EH_RETURN, MVT::Other, Expand);
+
+ if (TM.getSubtargetImpl()->isSubtargetV2()) {
+ setExceptionPointerRegister(Hexagon::R20);
+ setExceptionSelectorRegister(Hexagon::R21);
+ } else {
+ setExceptionPointerRegister(Hexagon::R0);
+ setExceptionSelectorRegister(Hexagon::R1);
+ }
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+
+
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+ setOperationAction(ISD::INLINEASM , MVT::Other, Custom);
+
+ setMinFunctionAlignment(2);
+
+ // Needed for DYNAMIC_STACKALLOC expansion.
+ unsigned StackRegister = TM.getRegisterInfo()->getStackRegister();
+ setStackPointerRegisterToSaveRestore(StackRegister);
+}
+
+
+const char*
+HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case HexagonISD::CONST32: return "HexagonISD::CONST32";
+ case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC";
+ case HexagonISD::CMPICC: return "HexagonISD::CMPICC";
+ case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC";
+ case HexagonISD::BRICC: return "HexagonISD::BRICC";
+ case HexagonISD::BRFCC: return "HexagonISD::BRFCC";
+ case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC";
+ case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC";
+ case HexagonISD::Hi: return "HexagonISD::Hi";
+ case HexagonISD::Lo: return "HexagonISD::Lo";
+ case HexagonISD::FTOI: return "HexagonISD::FTOI";
+ case HexagonISD::ITOF: return "HexagonISD::ITOF";
+ case HexagonISD::CALL: return "HexagonISD::CALL";
+ case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
+ case HexagonISD::BR_JT: return "HexagonISD::BR_JT";
+ case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
+ }
+}
+
+bool
+HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+ EVT MTy1 = EVT::getEVT(Ty1);
+ EVT MTy2 = EVT::getEVT(Ty2);
+ if (!MTy1.isSimple() || !MTy2.isSimple()) {
+ return false;
+ }
+ return ((MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32));
+}
+
+bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+ if (!VT1.isSimple() || !VT2.isSimple()) {
+ return false;
+ }
+ return ((VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32));
+}
+
+SDValue
+HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Should not custom lower this!");
+ // Frame & Return address. Currently unimplemented.
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::GlobalTLSAddress:
+ assert(0 && "TLS not implemented for Hexagon.");
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
+ case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INLINEASM: return LowerINLINEASM(Op, DAG);
+
+ }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Hexagon Scheduler Hooks
+//===----------------------------------------------------------------------===//
+MachineBasicBlock *
+HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB)
+const {
+ switch (MI->getOpcode()) {
+ case Hexagon::ADJDYNALLOC: {
+ MachineFunction *MF = BB->getParent();
+ HexagonMachineFunctionInfo *FuncInfo =
+ MF->getInfo<HexagonMachineFunctionInfo>();
+ FuncInfo->addAllocaAdjustInst(MI);
+ return BB;
+ }
+ default:
+ assert(false && "Unexpected instr type to insert");
+ } // switch
+ return NULL;
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::pair<unsigned, const TargetRegisterClass*>
+HexagonTargetLowering::getRegForInlineAsmConstraint(const
+ std::string &Constraint,
+ EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r': // R0-R31
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ assert(0 && "getRegForInlineAsmConstraint Unhandled data type");
+ case MVT::i32:
+ case MVT::i16:
+ case MVT::i8:
+ return std::make_pair(0U, Hexagon::IntRegsRegisterClass);
+ case MVT::i64:
+ return std::make_pair(0U, Hexagon::DoubleRegsRegisterClass);
+ }
+ default:
+ assert(0 && "Unknown asm register class");
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented by
+/// AM is legal for this target, for a load/store of the specified type.
+bool HexagonTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ // Allows a signed-extended 11-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1) {
+ return false;
+ }
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV) {
+ return false;
+ }
+
+ int Scale = AM.Scale;
+ if (Scale < 0) Scale = -Scale;
+ switch (Scale) {
+ case 0: // No scale reg, "r+i", "r", or just "i".
+ break;
+ default: // No scaled addressing mode.
+ return false;
+ }
+ return true;
+}
+
+/// isLegalICmpImmediate - Return true if the specified immediate is legal
+/// icmp immediate, that is the target has icmp instructions which can compare
+/// a register against the immediate without having to materialize the
+/// immediate into a register.
+bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ return Imm >= -512 && Imm <= 511;
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
+ SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ const Function *CallerF = DAG.getMachineFunction().getFunction();
+ CallingConv::ID CallerCC = CallerF->getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ // ***************************************************************************
+ // Look for obvious safe cases to perform tail call optimization that do not
+ // require ABI changes.
+ // ***************************************************************************
+
+ // If this is a tail call via a function pointer, then don't do it!
+ if (!(dyn_cast<GlobalAddressSDNode>(Callee))
+ && !(dyn_cast<ExternalSymbolSDNode>(Callee))) {
+ return false;
+ }
+
+ // Do not optimize if the calling conventions do not match.
+ if (!CCMatch)
+ return false;
+
+ // Do not tail call optimize vararg calls.
+ if (isVarArg)
+ return false;
+
+ // Also avoid tail call optimization if either caller or callee uses struct
+ // return semantics.
+ if (isCalleeStructRet || isCallerStructRet)
+ return false;
+
+ // In addition to the cases above, we also disable Tail Call Optimization if
+ // the calling convention code that at least one outgoing argument needs to
+ // go on the stack. We cannot check that here because at this point that
+ // information is not available.
+ return true;
+}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
new file mode 100644
index 0000000..b327615
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -0,0 +1,162 @@
+//==-- HexagonISelLowering.h - Hexagon DAG Lowering Interface ----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Hexagon uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef Hexagon_ISELLOWERING_H
+#define Hexagon_ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "Hexagon.h"
+
+namespace llvm {
+ namespace HexagonISD {
+ enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ CONST32,
+ CONST32_GP, // For marking data present in GP.
+ SETCC,
+ ADJDYNALLOC,
+ ARGEXTEND,
+
+ CMPICC, // Compare two GPR operands, set icc.
+ CMPFCC, // Compare two FP operands, set fcc.
+ BRICC, // Branch to dest on icc condition
+ BRFCC, // Branch to dest on fcc condition
+ SELECT_ICC, // Select between two values using the current ICC flags.
+ SELECT_FCC, // Select between two values using the current FCC flags.
+
+ Hi, Lo, // Hi/Lo operations, typically on a global address.
+
+ FTOI, // FP to Int within a FP register.
+ ITOF, // Int to FP within a FP register.
+
+ CALL, // A call instruction.
+ RET_FLAG, // Return with a flag operand.
+ BR_JT, // Jump table.
+ BARRIER, // Memory barrier.
+ WrapperJT,
+ TC_RETURN
+ };
+ }
+
+ class HexagonTargetLowering : public TargetLowering {
+ int VarArgsFrameOffset; // Frame offset to start of varargs area.
+
+ bool CanReturnSmallStruct(const Function* CalleeFn,
+ unsigned& RetSize) const;
+
+ public:
+ HexagonTargetMachine &TM;
+ explicit HexagonTargetLowering(HexagonTargetMachine &targetmachine);
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Targets which want to do tail call
+ /// optimization should implement this function.
+ bool
+ IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const
+ SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+
+ virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
+ virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDValue Callee) const;
+
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
+ SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual MachineBasicBlock
+ *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ virtual EVT getSetCCResultType(EVT VT) const {
+ return MVT::i1;
+ }
+
+ virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ // Intrinsics
+ virtual SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const;
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ /// The type may be VoidTy, in which case only return true if the addressing
+ /// mode is legal for a load/store of any legal type.
+ /// TODO: Handle pre/postinc as well.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
+
+ /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// icmp immediate, that is the target has icmp instructions which can
+ /// compare a register against the immediate without having to materialize
+ /// the immediate into a register.
+ virtual bool isLegalICmpImmediate(int64_t Imm) const;
+ };
+} // end namespace llvm
+
+#endif // Hexagon_ISELLOWERING_H
diff --git a/lib/Target/Hexagon/HexagonImmediates.td b/lib/Target/Hexagon/HexagonImmediates.td
new file mode 100644
index 0000000..1e3fcb8
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonImmediates.td
@@ -0,0 +1,491 @@
+//=- HexagonImmediates.td - Hexagon immediate processing --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illnois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// From IA64's InstrInfo file
+def s32Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s16Imm : Operand<i32> {
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s12Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s11Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s11_0Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s11_1Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s11_2Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s11_3Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s10Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s8Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s9Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s8Imm64 : Operand<i64> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s6Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s4Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s4_0Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s4_1Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s4_2Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def s4_3Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u64Imm : Operand<i64> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u32Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u16Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u16_0Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u16_1Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u16_2Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u11_3Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u10Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u9Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u8Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u7Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u6Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u6_0Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u6_1Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u6_2Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u6_3Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u5Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u4Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u3Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def u2Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def n8Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+def m6Imm : Operand<i32> {
+ // For now, we use a generic print function for all operands.
+ let PrintMethod = "printHexagonImmOperand";
+}
+
+//
+// Immediate predicates
+//
+def s32ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<32>(v);
+}]>;
+
+def s32_24ImmPred : PatLeaf<(i32 imm), [{
+ // s32_24ImmPred predicate - True if the immediate fits in a 32-bit sign
+ // extended field that is a multiple of 0x1000000.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<32,24>(v);
+}]>;
+
+def s32_16s8ImmPred : PatLeaf<(i32 imm), [{
+ // s32_16s8ImmPred predicate - True if the immediate fits in a 32-bit sign
+ // extended field that is a multiple of 0x10000.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<24,16>(v);
+}]>;
+
+def s16ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<16>(v);
+}]>;
+
+
+def s13ImmPred : PatLeaf<(i32 imm), [{
+ // immS13 predicate - True if the immediate fits in a 13-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<13>(v);
+}]>;
+
+
+def s12ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<12>(v);
+}]>;
+
+def s11_0ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<11>(v);
+}]>;
+
+
+def s11_1ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,1>(v);
+}]>;
+
+
+def s11_2ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,2>(v);
+}]>;
+
+
+def s11_3ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<11,3>(v);
+}]>;
+
+
+def s10ImmPred : PatLeaf<(i32 imm), [{
+ // s10ImmPred predicate - True if the immediate fits in a 10-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<10>(v);
+}]>;
+
+
+def s9ImmPred : PatLeaf<(i32 imm), [{
+ // s9ImmPred predicate - True if the immediate fits in a 9-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<9>(v);
+}]>;
+
+
+def s8ImmPred : PatLeaf<(i32 imm), [{
+ // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<8>(v);
+}]>;
+
+
+def s8Imm64Pred : PatLeaf<(i64 imm), [{
+ // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<8>(v);
+}]>;
+
+
+def s6ImmPred : PatLeaf<(i32 imm), [{
+ // s6ImmPred predicate - True if the immediate fits in a 6-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<6>(v);
+}]>;
+
+
+def s4_0ImmPred : PatLeaf<(i32 imm), [{
+ // s4_0ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<4>(v);
+}]>;
+
+
+def s4_1ImmPred : PatLeaf<(i32 imm), [{
+ // s4_1ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+ // field of 2.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<4,1>(v);
+}]>;
+
+
+def s4_2ImmPred : PatLeaf<(i32 imm), [{
+ // s4_2ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+ // field that is a multiple of 4.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<4,2>(v);
+}]>;
+
+
+def s4_3ImmPred : PatLeaf<(i32 imm), [{
+ // s4_3ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+ // field that is a multiple of 8.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<4,3>(v);
+}]>;
+
+
+def u64ImmPred : PatLeaf<(i64 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ // Adding "N ||" to supress gcc unused warning.
+ return (N || true);
+}]>;
+
+def u32ImmPred : PatLeaf<(i32 imm), [{
+ // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<32>(v);
+}]>;
+
+def u16ImmPred : PatLeaf<(i32 imm), [{
+ // u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<16>(v);
+}]>;
+
+def u16_s8ImmPred : PatLeaf<(i32 imm), [{
+ // u16_s8ImmPred predicate - True if the immediate fits in a 16-bit sign
+ // extended s8 field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<16,8>(v);
+}]>;
+
+def u9ImmPred : PatLeaf<(i32 imm), [{
+ // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<9>(v);
+}]>;
+
+
+def u8ImmPred : PatLeaf<(i32 imm), [{
+ // u8ImmPred predicate - True if the immediate fits in a 8-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<8>(v);
+}]>;
+
+def u7ImmPred : PatLeaf<(i32 imm), [{
+ // u7ImmPred predicate - True if the immediate fits in a 8-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<7>(v);
+}]>;
+
+
+def u6ImmPred : PatLeaf<(i32 imm), [{
+ // u6ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<6>(v);
+}]>;
+
+def u6_0ImmPred : PatLeaf<(i32 imm), [{
+ // u6_0ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // field. Same as u6ImmPred.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<6>(v);
+}]>;
+
+def u6_1ImmPred : PatLeaf<(i32 imm), [{
+ // u6_1ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // field that is 1 bit alinged - multiple of 2.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,1>(v);
+}]>;
+
+def u6_2ImmPred : PatLeaf<(i32 imm), [{
+ // u6_2ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // field that is 2 bits alinged - multiple of 4.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,2>(v);
+}]>;
+
+def u6_3ImmPred : PatLeaf<(i32 imm), [{
+ // u6_3ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+ // field that is 3 bits alinged - multiple of 8.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<6,3>(v);
+}]>;
+
+def u5ImmPred : PatLeaf<(i32 imm), [{
+ // u5ImmPred predicate - True if the immediate fits in a 5-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<5>(v);
+}]>;
+
+
+def u3ImmPred : PatLeaf<(i32 imm), [{
+ // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<3>(v);
+}]>;
+
+
+def u2ImmPred : PatLeaf<(i32 imm), [{
+ // u2ImmPred predicate - True if the immediate fits in a 2-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<2>(v);
+}]>;
+
+
+def u1ImmPred : PatLeaf<(i1 imm), [{
+ // u1ImmPred predicate - True if the immediate fits in a 1-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<1>(v);
+}]>;
+
+def m6ImmPred : PatLeaf<(i32 imm), [{
+ // m6ImmPred predicate - True if the immediate is negative and fits in
+ // a 6-bit negative number.
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<6>(v);
+}]>;
+
+//InN means negative integers in [-(2^N - 1), 0]
+def n8ImmPred : PatLeaf<(i32 imm), [{
+ // n8ImmPred predicate - True if the immediate fits in a 8-bit unsigned
+ // field.
+ int64_t v = (int64_t)N->getSExtValue();
+ return (-255 <= v && v <= 0);
+}]>;
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
new file mode 100644
index 0000000..7e92776
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -0,0 +1,242 @@
+//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr,
+ InstrItinClass itin> : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "Hexagon";
+
+/* Commented out for Hexagon
+ bits<2> op;
+ let Inst{31-30} = op; */ // Top two bits are the 'op' field
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Constraints = cstr;
+ let Itinerary = itin;
+}
+
+//----------------------------------------------------------------------------//
+// Intruction Classes Definitions +
+//----------------------------------------------------------------------------//
+
+// LD Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", LD> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<13> imm13;
+}
+
+// LD Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, LD> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<13> imm13;
+}
+
+// ST Instruction Class in V2/V3 can take SLOT0 only.
+// ST Instruction Class in V4 can take SLOT0 & SLOT1.
+// Definition of the instruction class CHANGED from V2/V3 to V4.
+class STInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", ST> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<13> imm13;
+}
+
+// ST Instruction Class in V2/V3 can take SLOT0 only.
+// ST Instruction Class in V4 can take SLOT0 & SLOT1.
+// Definition of the instruction class CHANGED from V2/V3 to V4.
+class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, ST> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<13> imm13;
+}
+
+// ALU32 Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class ALU32Type<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", ALU32> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> imm16;
+ bits<16> imm16_2;
+}
+
+// ALU64 Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
+class ALU64Type<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", ALU64> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> imm16;
+ bits<16> imm16_2;
+}
+
+// M Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
+class MInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", M> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+}
+
+// M Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
+class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, M> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+}
+
+// S Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
+class SInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+//: InstHexagon<outs, ins, asmstr, pattern, cstr, !if(V4T, XTYPE_V4, M)> {
+ : InstHexagon<outs, ins, asmstr, pattern, "", S> {
+// : InstHexagon<outs, ins, asmstr, pattern, "", S> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+}
+
+// S Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
+class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, S> {
+// : InstHexagon<outs, ins, asmstr, pattern, cstr, S> {
+// : InstHexagon<outs, ins, asmstr, pattern, cstr, !if(V4T, XTYPE_V4, S)> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+}
+
+// J Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class JType<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", J> {
+ bits<16> imm16;
+}
+
+// JR Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class JRType<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", JR> {
+ bits<5> rs;
+ bits<5> pu; // Predicate register
+}
+
+// CR Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", CR> {
+ bits<5> rs;
+ bits<10> imm10;
+}
+
+
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", PSEUDO>;
+
+
+//----------------------------------------------------------------------------//
+// Intruction Classes Definitions -
+//----------------------------------------------------------------------------//
+
+
+//
+// ALU32 patterns
+//.
+class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : ALU32Type<outs, ins, asmstr, pattern> {
+}
+
+class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : ALU32Type<outs, ins, asmstr, pattern> {
+ let rt{0-4} = 0;
+}
+
+class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : ALU32Type<outs, ins, asmstr, pattern> {
+ let rt{0-4} = 0;
+}
+
+class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : ALU32Type<outs, ins, asmstr, pattern> {
+ let rt{0-4} = 0;
+}
+
+//
+// ALU64 patterns.
+//
+class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : ALU64Type<outs, ins, asmstr, pattern> {
+}
+
+// J Type Instructions.
+class JInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : JType<outs, ins, asmstr, pattern> {
+}
+
+// JR type Instructions.
+class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : JRType<outs, ins, asmstr, pattern> {
+}
+
+
+// Post increment ST Instruction.
+class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern, string cstr>
+ : STInstPost<outs, ins, asmstr, pattern, cstr> {
+ let rt{0-4} = 0;
+}
+
+// Post increment LD Instruction.
+class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern, string cstr>
+ : LDInstPost<outs, ins, asmstr, pattern, cstr> {
+ let rt{0-4} = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// V4 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormatsV4.td"
+
+//===----------------------------------------------------------------------===//
+// V4 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
new file mode 100644
index 0000000..bd5e449
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -0,0 +1,46 @@
+//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V4 instruction classes in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// NV type instructions.
+//
+class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", NV_V4> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<13> imm13;
+}
+
+// Definition of Post increment new value store.
+class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, NV_V4> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<13> imm13;
+}
+
+// Post increment ST Instruction.
+class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern,
+ string cstr>
+ : NVInstPost_V4<outs, ins, asmstr, pattern, cstr> {
+ let rt{0-4} = 0;
+}
+
+class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", MEM_V4> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<6> imm6;
+}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
new file mode 100644
index 0000000..69a50d7
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -0,0 +1,1459 @@
+//=- HexagonInstrInfo.cpp - Hexagon Instruction Information -------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonRegisterInfo.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
+#include "Hexagon.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#define GET_INSTRINFO_CTOR
+#include "HexagonGenInstrInfo.inc"
+
+#include <iostream>
+
+
+using namespace llvm;
+
+///
+/// Constants for Hexagon instructions.
+///
+const int Hexagon_MEMW_OFFSET_MAX = 4095;
+const int Hexagon_MEMW_OFFSET_MIN = 4096;
+const int Hexagon_MEMD_OFFSET_MAX = 8191;
+const int Hexagon_MEMD_OFFSET_MIN = 8192;
+const int Hexagon_MEMH_OFFSET_MAX = 2047;
+const int Hexagon_MEMH_OFFSET_MIN = 2048;
+const int Hexagon_MEMB_OFFSET_MAX = 1023;
+const int Hexagon_MEMB_OFFSET_MIN = 1024;
+const int Hexagon_ADDI_OFFSET_MAX = 32767;
+const int Hexagon_ADDI_OFFSET_MIN = 32768;
+const int Hexagon_MEMD_AUTOINC_MAX = 56;
+const int Hexagon_MEMD_AUTOINC_MIN = 64;
+const int Hexagon_MEMW_AUTOINC_MAX = 28;
+const int Hexagon_MEMW_AUTOINC_MIN = 32;
+const int Hexagon_MEMH_AUTOINC_MAX = 14;
+const int Hexagon_MEMH_AUTOINC_MIN = 16;
+const int Hexagon_MEMB_AUTOINC_MAX = 7;
+const int Hexagon_MEMB_AUTOINC_MIN = 8;
+
+
+
+HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
+ : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
+ RI(ST, *this), Subtarget(ST) {
+}
+
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+
+
+ switch (MI->getOpcode()) {
+ case Hexagon::LDriw:
+ case Hexagon::LDrid:
+ case Hexagon::LDrih:
+ case Hexagon::LDrib:
+ case Hexagon::LDriub:
+ if (MI->getOperand(2).isFI() &&
+ MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::STriw:
+ case Hexagon::STrid:
+ case Hexagon::STrih:
+ case Hexagon::STrib:
+ if (MI->getOperand(2).isFI() &&
+ MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+
+unsigned
+HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const{
+
+ int BOpc = Hexagon::JMP;
+ int BccOpc = Hexagon::JMP_Pred;
+
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+ int regPos = 0;
+ // Check if ReverseBranchCondition has asked to reverse this branch
+ // If we want to reverse the branch an odd number of times, we want
+ // JMP_PredNot.
+ if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
+ BccOpc = Hexagon::JMP_PredNot;
+ regPos = 1;
+ }
+
+ if (FBB == 0) {
+ if (Cond.empty()) {
+ // Due to a bug in TailMerging/CFG Optimization, we need to add a
+ // special case handling of a predicated jump followed by an
+ // unconditional jump. If not, Tail Merging and CFG Optimization go
+ // into an infinite loop.
+ MachineBasicBlock *NewTBB, *NewFBB;
+ SmallVector<MachineOperand, 4> Cond;
+ MachineInstr *Term = MBB.getFirstTerminator();
+ if (isPredicated(Term) && !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond,
+ false)) {
+ MachineBasicBlock *NextBB =
+ llvm::next(MachineFunction::iterator(&MBB));
+ if (NewTBB == NextBB) {
+ ReverseBranchCondition(Cond);
+ RemoveBranch(MBB);
+ return InsertBranch(MBB, TBB, 0, Cond, DL);
+ }
+ }
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+ } else {
+ BuildMI(&MBB, DL,
+ get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
+ }
+ return 1;
+ }
+
+ BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+
+ return 2;
+}
+
+
+bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ FBB = NULL;
+
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+
+ // A basic block may looks like this:
+ //
+ // [ insn
+ // EH_LABEL
+ // insn
+ // insn
+ // insn
+ // EH_LABEL
+ // insn ]
+ //
+ // It has two succs but does not have a terminator
+ // Don't know how to handle it.
+ do {
+ --I;
+ if (I->isEHLabel())
+ return true;
+ } while (I != MBB.begin());
+
+ I = MBB.end();
+ --I;
+
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastInst->getOpcode() == Hexagon::JMP) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (LastInst->getOpcode() == Hexagon::JMP_Pred) {
+ // Block ends with fall-through true condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ if (LastInst->getOpcode() == Hexagon::JMP_PredNot) {
+ // Block ends with fall-through false condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with Hexagon::BRCOND and Hexagon:JMP, handle it.
+ if (((SecondLastInst->getOpcode() == Hexagon::BRCOND) ||
+ (SecondLastInst->getOpcode() == Hexagon::JMP_Pred)) &&
+ LastInst->getOpcode() == Hexagon::JMP) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with Hexagon::JMP_PredNot and Hexagon:JMP, handle it.
+ if ((SecondLastInst->getOpcode() == Hexagon::JMP_PredNot) &&
+ LastInst->getOpcode() == Hexagon::JMP) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two Hexagon:JMPs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == Hexagon::JMP &&
+ LastInst->getOpcode() == Hexagon::JMP) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+
+unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ int BOpc = Hexagon::JMP;
+ int BccOpc = Hexagon::JMP_Pred;
+ int BccOpcNot = Hexagon::JMP_PredNot;
+
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc &&
+ I->getOpcode() != BccOpcNot)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != BccOpc && I->getOpcode() != BccOpcNot)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+
+void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::TFR), DestReg).addReg(SrcReg);
+ return;
+ }
+ if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::TFR_64), DestReg).addReg(SrcReg);
+ return;
+ }
+ if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) {
+ // Map Pd = Ps to Pd = or(Ps, Ps).
+ BuildMI(MBB, I, DL, get(Hexagon::OR_pp),
+ DestReg).addReg(SrcReg).addReg(SrcReg);
+ return;
+ }
+ if (Hexagon::DoubleRegsRegClass.contains(DestReg, SrcReg)) {
+ // We can have an overlap between single and double reg: r1:0 = r0.
+ if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) {
+ // r1:0 = r0
+ BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg,
+ Hexagon::subreg_hireg))).addImm(0);
+ } else {
+ // r1:0 = r1 or no overlap.
+ BuildMI(MBB, I, DL, get(Hexagon::TFR), (RI.getSubReg(DestReg,
+ Hexagon::subreg_loreg))).addReg(SrcReg);
+ BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg,
+ Hexagon::subreg_hireg))).addImm(0);
+ }
+ return;
+ }
+ if (Hexagon::CRRegsRegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::TFCR), DestReg).addReg(SrcReg);
+ return;
+ }
+
+ assert (0 && "Unimplemented");
+}
+
+
+void HexagonInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+
+ DebugLoc DL = MBB.findDebugLoc(I);
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ Align);
+
+ if (Hexagon::IntRegsRegisterClass->hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Hexagon::STriw))
+ .addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ } else if (Hexagon::DoubleRegsRegisterClass->hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Hexagon::STrid))
+ .addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ } else if (Hexagon::PredRegsRegisterClass->hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Hexagon::STriw_pred))
+ .addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ } else {
+ assert(0 && "Unimplemented");
+ }
+}
+
+
+void HexagonInstrInfo::storeRegToAddr(
+ MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const
+{
+ assert(0 && "Unimplemented");
+ return;
+}
+
+
+void HexagonInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = MBB.findDebugLoc(I);
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ Align);
+
+ if (RC == Hexagon::IntRegsRegisterClass) {
+ BuildMI(MBB, I, DL, get(Hexagon::LDriw), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ } else if (RC == Hexagon::DoubleRegsRegisterClass) {
+ BuildMI(MBB, I, DL, get(Hexagon::LDrid), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ } else if (RC == Hexagon::PredRegsRegisterClass) {
+ BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ } else {
+ assert(0 && "Can't store this register to stack slot");
+ }
+}
+
+
+void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ assert(0 && "Unimplemented");
+}
+
+
+MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FI) const {
+ // Hexagon_TODO: Implement.
+ return(0);
+}
+
+
+unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
+
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetRegisterClass *TRC;
+ if (VT == MVT::i1) {
+ TRC = Hexagon::PredRegsRegisterClass;
+ } else if (VT == MVT::i32) {
+ TRC = Hexagon::IntRegsRegisterClass;
+ } else if (VT == MVT::i64) {
+ TRC = Hexagon::DoubleRegsRegisterClass;
+ } else {
+ assert(0 && "Cannot handle this register class");
+ }
+
+ unsigned NewReg = RegInfo.createVirtualRegister(TRC);
+ return NewReg;
+}
+
+
+bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
+ bool isPred = MI->getDesc().isPredicable();
+
+ if (!isPred)
+ return false;
+
+ const int Opc = MI->getOpcode();
+
+ switch(Opc) {
+ case Hexagon::TFRI:
+ return isInt<12>(MI->getOperand(1).getImm());
+
+ case Hexagon::STrid:
+ case Hexagon::STrid_indexed:
+ return isShiftedUInt<6,3>(MI->getOperand(1).getImm());
+
+ case Hexagon::STriw:
+ case Hexagon::STriw_indexed:
+ case Hexagon::STriw_nv_V4:
+ return isShiftedUInt<6,2>(MI->getOperand(1).getImm());
+
+ case Hexagon::STrih:
+ case Hexagon::STrih_indexed:
+ case Hexagon::STrih_nv_V4:
+ return isShiftedUInt<6,1>(MI->getOperand(1).getImm());
+
+ case Hexagon::STrib:
+ case Hexagon::STrib_indexed:
+ case Hexagon::STrib_nv_V4:
+ return isUInt<6>(MI->getOperand(1).getImm());
+
+ case Hexagon::LDrid:
+ case Hexagon::LDrid_indexed:
+ return isShiftedUInt<6,3>(MI->getOperand(2).getImm());
+
+ case Hexagon::LDriw:
+ case Hexagon::LDriw_indexed:
+ return isShiftedUInt<6,2>(MI->getOperand(2).getImm());
+
+ case Hexagon::LDrih:
+ case Hexagon::LDriuh:
+ case Hexagon::LDrih_indexed:
+ case Hexagon::LDriuh_indexed:
+ return isShiftedUInt<6,1>(MI->getOperand(2).getImm());
+
+ case Hexagon::LDrib:
+ case Hexagon::LDriub:
+ case Hexagon::LDrib_indexed:
+ case Hexagon::LDriub_indexed:
+ return isUInt<6>(MI->getOperand(2).getImm());
+
+ case Hexagon::POST_LDrid:
+ return isShiftedInt<4,3>(MI->getOperand(3).getImm());
+
+ case Hexagon::POST_LDriw:
+ return isShiftedInt<4,2>(MI->getOperand(3).getImm());
+
+ case Hexagon::POST_LDrih:
+ case Hexagon::POST_LDriuh:
+ return isShiftedInt<4,1>(MI->getOperand(3).getImm());
+
+ case Hexagon::POST_LDrib:
+ case Hexagon::POST_LDriub:
+ return isInt<4>(MI->getOperand(3).getImm());
+
+ case Hexagon::STrib_imm_V4:
+ case Hexagon::STrih_imm_V4:
+ case Hexagon::STriw_imm_V4:
+ return (isUInt<6>(MI->getOperand(1).getImm()) &&
+ isInt<6>(MI->getOperand(2).getImm()));
+
+ case Hexagon::ADD_ri:
+ return isInt<8>(MI->getOperand(2).getImm());
+
+ case Hexagon::ASLH:
+ case Hexagon::ASRH:
+ case Hexagon::SXTB:
+ case Hexagon::SXTH:
+ case Hexagon::ZXTB:
+ case Hexagon::ZXTH:
+ return Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+
+ case Hexagon::JMPR:
+ return false;
+ return true;
+
+ default:
+ return true;
+ }
+
+ return true;
+}
+
+
+int HexagonInstrInfo::
+getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
+ switch(Opc) {
+ case Hexagon::TFR:
+ return !invertPredicate ? Hexagon::TFR_cPt :
+ Hexagon::TFR_cNotPt;
+ case Hexagon::TFRI:
+ return !invertPredicate ? Hexagon::TFRI_cPt :
+ Hexagon::TFRI_cNotPt;
+ case Hexagon::JMP:
+ return !invertPredicate ? Hexagon::JMP_Pred :
+ Hexagon::JMP_PredNot;
+ case Hexagon::ADD_ri:
+ return !invertPredicate ? Hexagon::ADD_ri_cPt :
+ Hexagon::ADD_ri_cNotPt;
+ case Hexagon::ADD_rr:
+ return !invertPredicate ? Hexagon::ADD_rr_cPt :
+ Hexagon::ADD_rr_cNotPt;
+ case Hexagon::XOR_rr:
+ return !invertPredicate ? Hexagon::XOR_rr_cPt :
+ Hexagon::XOR_rr_cNotPt;
+ case Hexagon::AND_rr:
+ return !invertPredicate ? Hexagon::AND_rr_cPt :
+ Hexagon::AND_rr_cNotPt;
+ case Hexagon::OR_rr:
+ return !invertPredicate ? Hexagon::OR_rr_cPt :
+ Hexagon::OR_rr_cNotPt;
+ case Hexagon::SUB_rr:
+ return !invertPredicate ? Hexagon::SUB_rr_cPt :
+ Hexagon::SUB_rr_cNotPt;
+ case Hexagon::COMBINE_rr:
+ return !invertPredicate ? Hexagon::COMBINE_rr_cPt :
+ Hexagon::COMBINE_rr_cNotPt;
+ case Hexagon::ASLH:
+ return !invertPredicate ? Hexagon::ASLH_cPt_V4 :
+ Hexagon::ASLH_cNotPt_V4;
+ case Hexagon::ASRH:
+ return !invertPredicate ? Hexagon::ASRH_cPt_V4 :
+ Hexagon::ASRH_cNotPt_V4;
+ case Hexagon::SXTB:
+ return !invertPredicate ? Hexagon::SXTB_cPt_V4 :
+ Hexagon::SXTB_cNotPt_V4;
+ case Hexagon::SXTH:
+ return !invertPredicate ? Hexagon::SXTH_cPt_V4 :
+ Hexagon::SXTH_cNotPt_V4;
+ case Hexagon::ZXTB:
+ return !invertPredicate ? Hexagon::ZXTB_cPt_V4 :
+ Hexagon::ZXTB_cNotPt_V4;
+ case Hexagon::ZXTH:
+ return !invertPredicate ? Hexagon::ZXTH_cPt_V4 :
+ Hexagon::ZXTH_cNotPt_V4;
+
+ case Hexagon::JMPR:
+ return !invertPredicate ? Hexagon::JMPR_cPt :
+ Hexagon::JMPR_cNotPt;
+
+ // V4 indexed+scaled load.
+ case Hexagon::LDrid_indexed_V4:
+ return !invertPredicate ? Hexagon::LDrid_indexed_cPt_V4 :
+ Hexagon::LDrid_indexed_cNotPt_V4;
+ case Hexagon::LDrid_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDrid_indexed_shl_cPt_V4 :
+ Hexagon::LDrid_indexed_shl_cNotPt_V4;
+ case Hexagon::LDrib_indexed_V4:
+ return !invertPredicate ? Hexagon::LDrib_indexed_cPt_V4 :
+ Hexagon::LDrib_indexed_cNotPt_V4;
+ case Hexagon::LDriub_indexed_V4:
+ return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 :
+ Hexagon::LDriub_indexed_cNotPt_V4;
+ case Hexagon::LDriub_ae_indexed_V4:
+ return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 :
+ Hexagon::LDriub_indexed_cNotPt_V4;
+ case Hexagon::LDrib_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDrib_indexed_shl_cPt_V4 :
+ Hexagon::LDrib_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriub_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 :
+ Hexagon::LDriub_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriub_ae_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 :
+ Hexagon::LDriub_indexed_shl_cNotPt_V4;
+ case Hexagon::LDrih_indexed_V4:
+ return !invertPredicate ? Hexagon::LDrih_indexed_cPt_V4 :
+ Hexagon::LDrih_indexed_cNotPt_V4;
+ case Hexagon::LDriuh_indexed_V4:
+ return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 :
+ Hexagon::LDriuh_indexed_cNotPt_V4;
+ case Hexagon::LDriuh_ae_indexed_V4:
+ return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 :
+ Hexagon::LDriuh_indexed_cNotPt_V4;
+ case Hexagon::LDrih_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDrih_indexed_shl_cPt_V4 :
+ Hexagon::LDrih_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriuh_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 :
+ Hexagon::LDriuh_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriuh_ae_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 :
+ Hexagon::LDriuh_indexed_shl_cNotPt_V4;
+ case Hexagon::LDriw_indexed_V4:
+ return !invertPredicate ? Hexagon::LDriw_indexed_cPt_V4 :
+ Hexagon::LDriw_indexed_cNotPt_V4;
+ case Hexagon::LDriw_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::LDriw_indexed_shl_cPt_V4 :
+ Hexagon::LDriw_indexed_shl_cNotPt_V4;
+ // Byte.
+ case Hexagon::POST_STbri:
+ return !invertPredicate ? Hexagon::POST_STbri_cPt :
+ Hexagon::POST_STbri_cNotPt;
+ case Hexagon::STrib:
+ return !invertPredicate ? Hexagon::STrib_cPt :
+ Hexagon::STrib_cNotPt;
+ case Hexagon::STrib_indexed:
+ return !invertPredicate ? Hexagon::STrib_indexed_cPt :
+ Hexagon::STrib_indexed_cNotPt;
+ case Hexagon::STrib_imm_V4:
+ return !invertPredicate ? Hexagon::STrib_imm_cPt_V4 :
+ Hexagon::STrib_imm_cNotPt_V4;
+ case Hexagon::STrib_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::STrib_indexed_shl_cPt_V4 :
+ Hexagon::STrib_indexed_shl_cNotPt_V4;
+ // Halfword.
+ case Hexagon::POST_SThri:
+ return !invertPredicate ? Hexagon::POST_SThri_cPt :
+ Hexagon::POST_SThri_cNotPt;
+ case Hexagon::STrih:
+ return !invertPredicate ? Hexagon::STrih_cPt :
+ Hexagon::STrih_cNotPt;
+ case Hexagon::STrih_indexed:
+ return !invertPredicate ? Hexagon::STrih_indexed_cPt :
+ Hexagon::STrih_indexed_cNotPt;
+ case Hexagon::STrih_imm_V4:
+ return !invertPredicate ? Hexagon::STrih_imm_cPt_V4 :
+ Hexagon::STrih_imm_cNotPt_V4;
+ case Hexagon::STrih_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::STrih_indexed_shl_cPt_V4 :
+ Hexagon::STrih_indexed_shl_cNotPt_V4;
+ // Word.
+ case Hexagon::POST_STwri:
+ return !invertPredicate ? Hexagon::POST_STwri_cPt :
+ Hexagon::POST_STwri_cNotPt;
+ case Hexagon::STriw:
+ return !invertPredicate ? Hexagon::STriw_cPt :
+ Hexagon::STriw_cNotPt;
+ case Hexagon::STriw_indexed:
+ return !invertPredicate ? Hexagon::STriw_indexed_cPt :
+ Hexagon::STriw_indexed_cNotPt;
+ case Hexagon::STriw_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::STriw_indexed_shl_cPt_V4 :
+ Hexagon::STriw_indexed_shl_cNotPt_V4;
+ case Hexagon::STriw_imm_V4:
+ return !invertPredicate ? Hexagon::STriw_imm_cPt_V4 :
+ Hexagon::STriw_imm_cNotPt_V4;
+ // Double word.
+ case Hexagon::POST_STdri:
+ return !invertPredicate ? Hexagon::POST_STdri_cPt :
+ Hexagon::POST_STdri_cNotPt;
+ case Hexagon::STrid:
+ return !invertPredicate ? Hexagon::STrid_cPt :
+ Hexagon::STrid_cNotPt;
+ case Hexagon::STrid_indexed:
+ return !invertPredicate ? Hexagon::STrid_indexed_cPt :
+ Hexagon::STrid_indexed_cNotPt;
+ case Hexagon::STrid_indexed_shl_V4:
+ return !invertPredicate ? Hexagon::STrid_indexed_shl_cPt_V4 :
+ Hexagon::STrid_indexed_shl_cNotPt_V4;
+ // Load.
+ case Hexagon::LDrid:
+ return !invertPredicate ? Hexagon::LDrid_cPt :
+ Hexagon::LDrid_cNotPt;
+ case Hexagon::LDriw:
+ return !invertPredicate ? Hexagon::LDriw_cPt :
+ Hexagon::LDriw_cNotPt;
+ case Hexagon::LDrih:
+ return !invertPredicate ? Hexagon::LDrih_cPt :
+ Hexagon::LDrih_cNotPt;
+ case Hexagon::LDriuh:
+ return !invertPredicate ? Hexagon::LDriuh_cPt :
+ Hexagon::LDriuh_cNotPt;
+ case Hexagon::LDrib:
+ return !invertPredicate ? Hexagon::LDrib_cPt :
+ Hexagon::LDrib_cNotPt;
+ case Hexagon::LDriub:
+ return !invertPredicate ? Hexagon::LDriub_cPt :
+ Hexagon::LDriub_cNotPt;
+ case Hexagon::LDriubit:
+ return !invertPredicate ? Hexagon::LDriub_cPt :
+ Hexagon::LDriub_cNotPt;
+ // Load Indexed.
+ case Hexagon::LDrid_indexed:
+ return !invertPredicate ? Hexagon::LDrid_indexed_cPt :
+ Hexagon::LDrid_indexed_cNotPt;
+ case Hexagon::LDriw_indexed:
+ return !invertPredicate ? Hexagon::LDriw_indexed_cPt :
+ Hexagon::LDriw_indexed_cNotPt;
+ case Hexagon::LDrih_indexed:
+ return !invertPredicate ? Hexagon::LDrih_indexed_cPt :
+ Hexagon::LDrih_indexed_cNotPt;
+ case Hexagon::LDriuh_indexed:
+ return !invertPredicate ? Hexagon::LDriuh_indexed_cPt :
+ Hexagon::LDriuh_indexed_cNotPt;
+ case Hexagon::LDrib_indexed:
+ return !invertPredicate ? Hexagon::LDrib_indexed_cPt :
+ Hexagon::LDrib_indexed_cNotPt;
+ case Hexagon::LDriub_indexed:
+ return !invertPredicate ? Hexagon::LDriub_indexed_cPt :
+ Hexagon::LDriub_indexed_cNotPt;
+ // Post Increment Load.
+ case Hexagon::POST_LDrid:
+ return !invertPredicate ? Hexagon::POST_LDrid_cPt :
+ Hexagon::POST_LDrid_cNotPt;
+ case Hexagon::POST_LDriw:
+ return !invertPredicate ? Hexagon::POST_LDriw_cPt :
+ Hexagon::POST_LDriw_cNotPt;
+ case Hexagon::POST_LDrih:
+ return !invertPredicate ? Hexagon::POST_LDrih_cPt :
+ Hexagon::POST_LDrih_cNotPt;
+ case Hexagon::POST_LDriuh:
+ return !invertPredicate ? Hexagon::POST_LDriuh_cPt :
+ Hexagon::POST_LDriuh_cNotPt;
+ case Hexagon::POST_LDrib:
+ return !invertPredicate ? Hexagon::POST_LDrib_cPt :
+ Hexagon::POST_LDrib_cNotPt;
+ case Hexagon::POST_LDriub:
+ return !invertPredicate ? Hexagon::POST_LDriub_cPt :
+ Hexagon::POST_LDriub_cNotPt;
+ // DEALLOC_RETURN.
+ case Hexagon::DEALLOC_RET_V4:
+ return !invertPredicate ? Hexagon::DEALLOC_RET_cPt_V4 :
+ Hexagon::DEALLOC_RET_cNotPt_V4;
+ default:
+ assert(false && "Unexpected predicable instruction");
+ }
+}
+
+
+bool HexagonInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ int Opc = MI->getOpcode();
+ assert (isPredicable(MI) && "Expected predicable instruction");
+ bool invertJump = (!Cond.empty() && Cond[0].isImm() &&
+ (Cond[0].getImm() == 0));
+ MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump)));
+ //
+ // This assumes that the predicate is always the first operand
+ // in the set of inputs.
+ //
+ MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
+ int oper;
+ for (oper = MI->getNumOperands() - 3; oper >= 0; --oper) {
+ MachineOperand MO = MI->getOperand(oper);
+ if ((MO.isReg() && !MO.isUse() && !MO.isImplicit())) {
+ break;
+ }
+
+ if (MO.isReg()) {
+ MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(),
+ MO.isImplicit(), MO.isKill(),
+ MO.isDead(), MO.isUndef(),
+ MO.isDebug());
+ } else if (MO.isImm()) {
+ MI->getOperand(oper+1).ChangeToImmediate(MO.getImm());
+ } else {
+ assert(false && "Unexpected operand type");
+ }
+ }
+
+ int regPos = invertJump ? 1 : 0;
+ MachineOperand PredMO = Cond[regPos];
+ MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(),
+ PredMO.isImplicit(), PredMO.isKill(),
+ PredMO.isDead(), PredMO.isUndef(),
+ PredMO.isDebug());
+
+ return true;
+}
+
+
+bool
+HexagonInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCyles,
+ unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const {
+ return true;
+}
+
+
+bool
+HexagonInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles,
+ unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles,
+ unsigned ExtraFCycles,
+ const BranchProbability &Probability) const {
+ return true;
+}
+
+
+bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::TFR_cPt:
+ case Hexagon::TFR_cNotPt:
+ case Hexagon::TFRI_cPt:
+ case Hexagon::TFRI_cNotPt:
+ case Hexagon::TFR_cdnPt:
+ case Hexagon::TFR_cdnNotPt:
+ case Hexagon::TFRI_cdnPt:
+ case Hexagon::TFRI_cdnNotPt:
+ return true;
+
+ case Hexagon::JMP_Pred:
+ case Hexagon::JMP_PredNot:
+ case Hexagon::BRCOND:
+ case Hexagon::JMP_PredPt:
+ case Hexagon::JMP_PredNotPt:
+ case Hexagon::JMP_PredPnt:
+ case Hexagon::JMP_PredNotPnt:
+ return true;
+
+ case Hexagon::LDrid_indexed_cPt_V4 :
+ case Hexagon::LDrid_indexed_cdnPt_V4 :
+ case Hexagon::LDrid_indexed_cNotPt_V4 :
+ case Hexagon::LDrid_indexed_cdnNotPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDrib_indexed_cPt_V4 :
+ case Hexagon::LDrib_indexed_cdnPt_V4 :
+ case Hexagon::LDrib_indexed_cNotPt_V4 :
+ case Hexagon::LDrib_indexed_cdnNotPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriub_indexed_cPt_V4 :
+ case Hexagon::LDriub_indexed_cdnPt_V4 :
+ case Hexagon::LDriub_indexed_cNotPt_V4 :
+ case Hexagon::LDriub_indexed_cdnNotPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDrih_indexed_cPt_V4 :
+ case Hexagon::LDrih_indexed_cdnPt_V4 :
+ case Hexagon::LDrih_indexed_cNotPt_V4 :
+ case Hexagon::LDrih_indexed_cdnNotPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriuh_indexed_cPt_V4 :
+ case Hexagon::LDriuh_indexed_cdnPt_V4 :
+ case Hexagon::LDriuh_indexed_cNotPt_V4 :
+ case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
+ case Hexagon::LDriw_indexed_cPt_V4 :
+ case Hexagon::LDriw_indexed_cdnPt_V4 :
+ case Hexagon::LDriw_indexed_cNotPt_V4 :
+ case Hexagon::LDriw_indexed_cdnNotPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
+ return true;
+
+ case Hexagon::LDrid_cPt :
+ case Hexagon::LDrid_cNotPt :
+ case Hexagon::LDrid_indexed_cPt :
+ case Hexagon::LDrid_indexed_cNotPt :
+ case Hexagon::POST_LDrid_cPt :
+ case Hexagon::POST_LDrid_cNotPt :
+ case Hexagon::LDriw_cPt :
+ case Hexagon::LDriw_cNotPt :
+ case Hexagon::LDriw_indexed_cPt :
+ case Hexagon::LDriw_indexed_cNotPt :
+ case Hexagon::POST_LDriw_cPt :
+ case Hexagon::POST_LDriw_cNotPt :
+ case Hexagon::LDrih_cPt :
+ case Hexagon::LDrih_cNotPt :
+ case Hexagon::LDrih_indexed_cPt :
+ case Hexagon::LDrih_indexed_cNotPt :
+ case Hexagon::POST_LDrih_cPt :
+ case Hexagon::POST_LDrih_cNotPt :
+ case Hexagon::LDrib_cPt :
+ case Hexagon::LDrib_cNotPt :
+ case Hexagon::LDrib_indexed_cPt :
+ case Hexagon::LDrib_indexed_cNotPt :
+ case Hexagon::POST_LDrib_cPt :
+ case Hexagon::POST_LDrib_cNotPt :
+ case Hexagon::LDriuh_cPt :
+ case Hexagon::LDriuh_cNotPt :
+ case Hexagon::LDriuh_indexed_cPt :
+ case Hexagon::LDriuh_indexed_cNotPt :
+ case Hexagon::POST_LDriuh_cPt :
+ case Hexagon::POST_LDriuh_cNotPt :
+ case Hexagon::LDriub_cPt :
+ case Hexagon::LDriub_cNotPt :
+ case Hexagon::LDriub_indexed_cPt :
+ case Hexagon::LDriub_indexed_cNotPt :
+ case Hexagon::POST_LDriub_cPt :
+ case Hexagon::POST_LDriub_cNotPt :
+ return true;
+
+ case Hexagon::LDrid_cdnPt :
+ case Hexagon::LDrid_cdnNotPt :
+ case Hexagon::LDrid_indexed_cdnPt :
+ case Hexagon::LDrid_indexed_cdnNotPt :
+ case Hexagon::POST_LDrid_cdnPt_V4 :
+ case Hexagon::POST_LDrid_cdnNotPt_V4 :
+ case Hexagon::LDriw_cdnPt :
+ case Hexagon::LDriw_cdnNotPt :
+ case Hexagon::LDriw_indexed_cdnPt :
+ case Hexagon::LDriw_indexed_cdnNotPt :
+ case Hexagon::POST_LDriw_cdnPt_V4 :
+ case Hexagon::POST_LDriw_cdnNotPt_V4 :
+ case Hexagon::LDrih_cdnPt :
+ case Hexagon::LDrih_cdnNotPt :
+ case Hexagon::LDrih_indexed_cdnPt :
+ case Hexagon::LDrih_indexed_cdnNotPt :
+ case Hexagon::POST_LDrih_cdnPt_V4 :
+ case Hexagon::POST_LDrih_cdnNotPt_V4 :
+ case Hexagon::LDrib_cdnPt :
+ case Hexagon::LDrib_cdnNotPt :
+ case Hexagon::LDrib_indexed_cdnPt :
+ case Hexagon::LDrib_indexed_cdnNotPt :
+ case Hexagon::POST_LDrib_cdnPt_V4 :
+ case Hexagon::POST_LDrib_cdnNotPt_V4 :
+ case Hexagon::LDriuh_cdnPt :
+ case Hexagon::LDriuh_cdnNotPt :
+ case Hexagon::LDriuh_indexed_cdnPt :
+ case Hexagon::LDriuh_indexed_cdnNotPt :
+ case Hexagon::POST_LDriuh_cdnPt_V4 :
+ case Hexagon::POST_LDriuh_cdnNotPt_V4 :
+ case Hexagon::LDriub_cdnPt :
+ case Hexagon::LDriub_cdnNotPt :
+ case Hexagon::LDriub_indexed_cdnPt :
+ case Hexagon::LDriub_indexed_cdnNotPt :
+ case Hexagon::POST_LDriub_cdnPt_V4 :
+ case Hexagon::POST_LDriub_cdnNotPt_V4 :
+ return true;
+
+ case Hexagon::ADD_ri_cPt:
+ case Hexagon::ADD_ri_cNotPt:
+ case Hexagon::ADD_ri_cdnPt:
+ case Hexagon::ADD_ri_cdnNotPt:
+ case Hexagon::ADD_rr_cPt:
+ case Hexagon::ADD_rr_cNotPt:
+ case Hexagon::ADD_rr_cdnPt:
+ case Hexagon::ADD_rr_cdnNotPt:
+ case Hexagon::XOR_rr_cPt:
+ case Hexagon::XOR_rr_cNotPt:
+ case Hexagon::XOR_rr_cdnPt:
+ case Hexagon::XOR_rr_cdnNotPt:
+ case Hexagon::AND_rr_cPt:
+ case Hexagon::AND_rr_cNotPt:
+ case Hexagon::AND_rr_cdnPt:
+ case Hexagon::AND_rr_cdnNotPt:
+ case Hexagon::OR_rr_cPt:
+ case Hexagon::OR_rr_cNotPt:
+ case Hexagon::OR_rr_cdnPt:
+ case Hexagon::OR_rr_cdnNotPt:
+ case Hexagon::SUB_rr_cPt:
+ case Hexagon::SUB_rr_cNotPt:
+ case Hexagon::SUB_rr_cdnPt:
+ case Hexagon::SUB_rr_cdnNotPt:
+ case Hexagon::COMBINE_rr_cPt:
+ case Hexagon::COMBINE_rr_cNotPt:
+ case Hexagon::COMBINE_rr_cdnPt:
+ case Hexagon::COMBINE_rr_cdnNotPt:
+ return true;
+
+ case Hexagon::ASLH_cPt_V4:
+ case Hexagon::ASLH_cNotPt_V4:
+ case Hexagon::ASRH_cPt_V4:
+ case Hexagon::ASRH_cNotPt_V4:
+ case Hexagon::SXTB_cPt_V4:
+ case Hexagon::SXTB_cNotPt_V4:
+ case Hexagon::SXTH_cPt_V4:
+ case Hexagon::SXTH_cNotPt_V4:
+ case Hexagon::ZXTB_cPt_V4:
+ case Hexagon::ZXTB_cNotPt_V4:
+ case Hexagon::ZXTH_cPt_V4:
+ case Hexagon::ZXTH_cNotPt_V4:
+ return true;
+
+ case Hexagon::ASLH_cdnPt_V4:
+ case Hexagon::ASLH_cdnNotPt_V4:
+ case Hexagon::ASRH_cdnPt_V4:
+ case Hexagon::ASRH_cdnNotPt_V4:
+ case Hexagon::SXTB_cdnPt_V4:
+ case Hexagon::SXTB_cdnNotPt_V4:
+ case Hexagon::SXTH_cdnPt_V4:
+ case Hexagon::SXTH_cdnNotPt_V4:
+ case Hexagon::ZXTB_cdnPt_V4:
+ case Hexagon::ZXTB_cdnNotPt_V4:
+ case Hexagon::ZXTH_cdnPt_V4:
+ case Hexagon::ZXTH_cdnNotPt_V4:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+
+bool
+HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) {
+ MachineOperand MO = MI->getOperand(oper);
+ if (MO.isReg() && MO.isDef()) {
+ const TargetRegisterClass* RC = RI.getMinimalPhysRegClass(MO.getReg());
+ if (RC == Hexagon::PredRegsRegisterClass) {
+ Pred.push_back(MO);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+bool
+HexagonInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const {
+ // TODO: Fix this
+ return false;
+}
+
+
+//
+// We indicate that we want to reverse the branch by
+// inserting a 0 at the beginning of the Cond vector.
+//
+bool HexagonInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
+ Cond.erase(Cond.begin());
+ } else {
+ Cond.insert(Cond.begin(), MachineOperand::CreateImm(0));
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::
+isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumInstrs,
+ const BranchProbability &Probability) const {
+ return (NumInstrs <= 4);
+}
+
+bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::DEALLOC_RET_V4 :
+ case Hexagon::DEALLOC_RET_cPt_V4 :
+ case Hexagon::DEALLOC_RET_cNotPt_V4 :
+ case Hexagon::DEALLOC_RET_cdnPnt_V4 :
+ case Hexagon::DEALLOC_RET_cNotdnPnt_V4 :
+ case Hexagon::DEALLOC_RET_cdnPt_V4 :
+ case Hexagon::DEALLOC_RET_cNotdnPt_V4 :
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::
+isValidOffset(const int Opcode, const int Offset) const {
+ // This function is to check whether the "Offset" is in the correct range of
+ // the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is
+ // inserted to calculate the final address. Due to this reason, the function
+ // assumes that the "Offset" has correct alignment.
+
+ switch(Opcode) {
+
+ case Hexagon::LDriw:
+ case Hexagon::STriw:
+ case Hexagon::STriwt:
+ assert((Offset % 4 == 0) && "Offset has incorrect alignment");
+ return (Offset >= Hexagon_MEMW_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMW_OFFSET_MAX);
+
+ case Hexagon::LDrid:
+ case Hexagon::STrid:
+ assert((Offset % 8 == 0) && "Offset has incorrect alignment");
+ return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMD_OFFSET_MAX);
+
+ case Hexagon::LDrih:
+ case Hexagon::LDriuh:
+ case Hexagon::STrih:
+ case Hexagon::LDrih_ae:
+ assert((Offset % 2 == 0) && "Offset has incorrect alignment");
+ return (Offset >= Hexagon_MEMH_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMH_OFFSET_MAX);
+
+ case Hexagon::LDrib:
+ case Hexagon::STrib:
+ case Hexagon::LDriub:
+ case Hexagon::LDriubit:
+ case Hexagon::LDrib_ae:
+ case Hexagon::LDriub_ae:
+ return (Offset >= Hexagon_MEMB_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMB_OFFSET_MAX);
+
+ case Hexagon::ADD_ri:
+ case Hexagon::TFR_FI:
+ return (Offset >= Hexagon_ADDI_OFFSET_MIN) &&
+ (Offset <= Hexagon_ADDI_OFFSET_MAX);
+
+ case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMw_ADDi_MEM_V4 :
+ case Hexagon::MEMw_SUBi_MEM_V4 :
+ case Hexagon::MEMw_ADDr_MEM_V4 :
+ case Hexagon::MEMw_SUBr_MEM_V4 :
+ case Hexagon::MEMw_ANDr_MEM_V4 :
+ case Hexagon::MEMw_ORr_MEM_V4 :
+ assert ((Offset % 4) == 0 && "MEMOPw offset is not aligned correctly." );
+ return (0 <= Offset && Offset <= 255);
+
+ case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMh_ADDi_MEM_V4 :
+ case Hexagon::MEMh_SUBi_MEM_V4 :
+ case Hexagon::MEMh_ADDr_MEM_V4 :
+ case Hexagon::MEMh_SUBr_MEM_V4 :
+ case Hexagon::MEMh_ANDr_MEM_V4 :
+ case Hexagon::MEMh_ORr_MEM_V4 :
+ assert ((Offset % 2) == 0 && "MEMOPh offset is not aligned correctly." );
+ return (0 <= Offset && Offset <= 127);
+
+ case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMb_ADDi_MEM_V4 :
+ case Hexagon::MEMb_SUBi_MEM_V4 :
+ case Hexagon::MEMb_ADDr_MEM_V4 :
+ case Hexagon::MEMb_SUBr_MEM_V4 :
+ case Hexagon::MEMb_ANDr_MEM_V4 :
+ case Hexagon::MEMb_ORr_MEM_V4 :
+ return (0 <= Offset && Offset <= 63);
+
+ // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of
+ // any size. Later pass knows how to handle it.
+ case Hexagon::STriw_pred:
+ case Hexagon::LDriw_pred:
+ return true;
+
+ // INLINEASM is very special.
+ case Hexagon::INLINEASM:
+ return true;
+ }
+
+ assert(0 && "No offset range is defined for this opcode. Please define it in \
+ the above switch statement!");
+}
+
+
+//
+// Check if the Offset is a valid auto-inc imm by Load/Store Type.
+//
+bool HexagonInstrInfo::
+isValidAutoIncImm(const EVT VT, const int Offset) const {
+
+ if (VT == MVT::i64) {
+ return (Offset >= Hexagon_MEMD_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMD_AUTOINC_MAX &&
+ (Offset & 0x7) == 0);
+ }
+ if (VT == MVT::i32) {
+ return (Offset >= Hexagon_MEMW_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMW_AUTOINC_MAX &&
+ (Offset & 0x3) == 0);
+ }
+ if (VT == MVT::i16) {
+ return (Offset >= Hexagon_MEMH_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMH_AUTOINC_MAX &&
+ (Offset & 0x1) == 0);
+ }
+ if (VT == MVT::i8) {
+ return (Offset >= Hexagon_MEMB_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMB_AUTOINC_MAX);
+ }
+
+ assert(0 && "Not an auto-inc opc!");
+
+ return false;
+}
+
+
+bool HexagonInstrInfo::
+isMemOp(const MachineInstr *MI) const {
+ switch (MI->getOpcode())
+ {
+ case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMw_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMw_ADDi_MEM_V4 :
+ case Hexagon::MEMw_SUBi_MEM_V4 :
+ case Hexagon::MEMw_ADDr_MEM_V4 :
+ case Hexagon::MEMw_SUBr_MEM_V4 :
+ case Hexagon::MEMw_ANDr_MEM_V4 :
+ case Hexagon::MEMw_ORr_MEM_V4 :
+ case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMh_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMh_ADDi_MEM_V4 :
+ case Hexagon::MEMh_SUBi_MEM_V4 :
+ case Hexagon::MEMh_ADDr_MEM_V4 :
+ case Hexagon::MEMh_SUBr_MEM_V4 :
+ case Hexagon::MEMh_ANDr_MEM_V4 :
+ case Hexagon::MEMh_ORr_MEM_V4 :
+ case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
+ case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
+ case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ORr_indexed_MEM_V4 :
+ case Hexagon::MEMb_ADDSUBi_MEM_V4 :
+ case Hexagon::MEMb_ADDi_MEM_V4 :
+ case Hexagon::MEMb_SUBi_MEM_V4 :
+ case Hexagon::MEMb_ADDr_MEM_V4 :
+ case Hexagon::MEMb_SUBr_MEM_V4 :
+ case Hexagon::MEMb_ANDr_MEM_V4 :
+ case Hexagon::MEMb_ORr_MEM_V4 :
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::
+isSpillPredRegOp(const MachineInstr *MI) const {
+ switch (MI->getOpcode())
+ {
+ case Hexagon::STriw_pred :
+ case Hexagon::LDriw_pred :
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const {
+ const HexagonRegisterInfo& QRI = getRegisterInfo();
+ switch (MI->getOpcode())
+ {
+ case Hexagon::ADD_ri_cPt:
+ case Hexagon::ADD_ri_cNotPt:
+ case Hexagon::ADD_rr_cPt:
+ case Hexagon::ADD_rr_cNotPt:
+ case Hexagon::XOR_rr_cPt:
+ case Hexagon::XOR_rr_cNotPt:
+ case Hexagon::AND_rr_cPt:
+ case Hexagon::AND_rr_cNotPt:
+ case Hexagon::OR_rr_cPt:
+ case Hexagon::OR_rr_cNotPt:
+ case Hexagon::SUB_rr_cPt:
+ case Hexagon::SUB_rr_cNotPt:
+ case Hexagon::COMBINE_rr_cPt:
+ case Hexagon::COMBINE_rr_cNotPt:
+ return true;
+ case Hexagon::ASLH_cPt_V4:
+ case Hexagon::ASLH_cNotPt_V4:
+ case Hexagon::ASRH_cPt_V4:
+ case Hexagon::ASRH_cNotPt_V4:
+ case Hexagon::SXTB_cPt_V4:
+ case Hexagon::SXTB_cNotPt_V4:
+ case Hexagon::SXTH_cPt_V4:
+ case Hexagon::SXTH_cNotPt_V4:
+ case Hexagon::ZXTB_cPt_V4:
+ case Hexagon::ZXTB_cNotPt_V4:
+ case Hexagon::ZXTH_cPt_V4:
+ case Hexagon::ZXTH_cNotPt_V4:
+ return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+
+ default:
+ return false;
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::
+isConditionalLoad (const MachineInstr* MI) const {
+ const HexagonRegisterInfo& QRI = getRegisterInfo();
+ switch (MI->getOpcode())
+ {
+ case Hexagon::LDrid_cPt :
+ case Hexagon::LDrid_cNotPt :
+ case Hexagon::LDrid_indexed_cPt :
+ case Hexagon::LDrid_indexed_cNotPt :
+ case Hexagon::LDriw_cPt :
+ case Hexagon::LDriw_cNotPt :
+ case Hexagon::LDriw_indexed_cPt :
+ case Hexagon::LDriw_indexed_cNotPt :
+ case Hexagon::LDrih_cPt :
+ case Hexagon::LDrih_cNotPt :
+ case Hexagon::LDrih_indexed_cPt :
+ case Hexagon::LDrih_indexed_cNotPt :
+ case Hexagon::LDrib_cPt :
+ case Hexagon::LDrib_cNotPt :
+ case Hexagon::LDrib_indexed_cPt :
+ case Hexagon::LDrib_indexed_cNotPt :
+ case Hexagon::LDriuh_cPt :
+ case Hexagon::LDriuh_cNotPt :
+ case Hexagon::LDriuh_indexed_cPt :
+ case Hexagon::LDriuh_indexed_cNotPt :
+ case Hexagon::LDriub_cPt :
+ case Hexagon::LDriub_cNotPt :
+ case Hexagon::LDriub_indexed_cPt :
+ case Hexagon::LDriub_indexed_cNotPt :
+ return true;
+ case Hexagon::POST_LDrid_cPt :
+ case Hexagon::POST_LDrid_cNotPt :
+ case Hexagon::POST_LDriw_cPt :
+ case Hexagon::POST_LDriw_cNotPt :
+ case Hexagon::POST_LDrih_cPt :
+ case Hexagon::POST_LDrih_cNotPt :
+ case Hexagon::POST_LDrib_cPt :
+ case Hexagon::POST_LDrib_cNotPt :
+ case Hexagon::POST_LDriuh_cPt :
+ case Hexagon::POST_LDriuh_cNotPt :
+ case Hexagon::POST_LDriub_cPt :
+ case Hexagon::POST_LDriub_cNotPt :
+ return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+ case Hexagon::LDrid_indexed_cPt_V4 :
+ case Hexagon::LDrid_indexed_cNotPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cPt_V4 :
+ case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrib_indexed_cPt_V4 :
+ case Hexagon::LDrib_indexed_cNotPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cPt_V4 :
+ case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriub_indexed_cPt_V4 :
+ case Hexagon::LDriub_indexed_cNotPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cPt_V4 :
+ case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDrih_indexed_cPt_V4 :
+ case Hexagon::LDrih_indexed_cNotPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cPt_V4 :
+ case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriuh_indexed_cPt_V4 :
+ case Hexagon::LDriuh_indexed_cNotPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cPt_V4 :
+ case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
+ case Hexagon::LDriw_indexed_cPt_V4 :
+ case Hexagon::LDriw_indexed_cNotPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cPt_V4 :
+ case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
+ return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+ default:
+ return false;
+ }
+ return false;
+}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
new file mode 100644
index 0000000..d549c46
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -0,0 +1,166 @@
+//=- HexagonInstrInfo.h - Hexagon Instruction Information ---------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonINSTRUCTIONINFO_H
+#define HexagonINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "HexagonRegisterInfo.h"
+
+
+#define GET_INSTRINFO_HEADER
+#include "HexagonGenInstrInfo.inc"
+
+namespace llvm {
+
+class HexagonInstrInfo : public HexagonGenInstrInfo {
+ const HexagonRegisterInfo RI;
+ const HexagonSubtarget& Subtarget;
+public:
+ explicit HexagonInstrInfo(HexagonSubtarget &ST);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const HexagonRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
+ unsigned createVR(MachineFunction* MF, MVT VT) const;
+
+ virtual bool isPredicable(MachineInstr *MI) const;
+ virtual bool
+ PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+ unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles, unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles, unsigned ExtraFCycles,
+ const BranchProbability &Probability) const;
+
+ virtual bool isPredicated(const MachineInstr *MI) const;
+ virtual bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+ virtual bool
+ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ virtual bool
+ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ virtual bool
+ isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumCycles,
+ const BranchProbability &Probability) const;
+
+ bool isValidOffset(const int Opcode, const int Offset) const;
+ bool isValidAutoIncImm(const EVT VT, const int Offset) const;
+ bool isMemOp(const MachineInstr *MI) const;
+ bool isSpillPredRegOp(const MachineInstr *MI) const;
+ bool isU6_3Immediate(const int value) const;
+ bool isU6_2Immediate(const int value) const;
+ bool isU6_1Immediate(const int value) const;
+ bool isU6_0Immediate(const int value) const;
+ bool isS4_3Immediate(const int value) const;
+ bool isS4_2Immediate(const int value) const;
+ bool isS4_1Immediate(const int value) const;
+ bool isS4_0Immediate(const int value) const;
+ bool isS12_Immediate(const int value) const;
+ bool isU6_Immediate(const int value) const;
+ bool isS8_Immediate(const int value) const;
+ bool isS6_Immediate(const int value) const;
+
+ bool isConditionalALU32 (const MachineInstr* MI) const;
+ bool isConditionalLoad (const MachineInstr* MI) const;
+ bool isDeallocRet(const MachineInstr *MI) const;
+
+private:
+ int getMatchingCondBranchOpcode(int Opc, bool sense) const;
+
+};
+
+}
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
new file mode 100644
index 0000000..cc508b7
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -0,0 +1,3014 @@
+//==- HexagonInstrInfo.td - Target Description for Hexagon -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormats.td"
+include "HexagonImmediates.td"
+
+//===----------------------------------------------------------------------===//
+// Hexagon Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasV2T : Predicate<"Subtarget.hasV2TOps()">;
+def HasV2TOnly : Predicate<"Subtarget.hasV2TOpsOnly()">;
+def NoV2T : Predicate<"!Subtarget.hasV2TOps()">;
+def HasV3T : Predicate<"Subtarget.hasV3TOps()">;
+def HasV3TOnly : Predicate<"Subtarget.hasV3TOpsOnly()">;
+def NoV3T : Predicate<"!Subtarget.hasV3TOps()">;
+def HasV4T : Predicate<"Subtarget.hasV4TOps()">;
+def NoV4T : Predicate<"!Subtarget.hasV4TOps()">;
+def UseMEMOP : Predicate<"Subtarget.useMemOps()">;
+
+// Addressing modes.
+def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
+def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>;
+def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>;
+def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>;
+def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>;
+def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>;
+def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>;
+def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>;
+
+// Address operands.
+def MEMrr : Operand<i32> {
+ let PrintMethod = "printHexagonMEMrrOperand";
+ let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+
+// Address operands
+def MEMri : Operand<i32> {
+ let PrintMethod = "printHexagonMEMriOperand";
+ let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+
+def MEMri_s11_2 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectMEMriS11_2", []> {
+ let PrintMethod = "printHexagonMEMriOperand";
+ let MIOperandInfo = (ops IntRegs, s11Imm);
+}
+
+def FrameIndex : Operand<i32> {
+ let PrintMethod = "printHexagonFrameIndexOperand";
+ let MIOperandInfo = (ops IntRegs, s11Imm);
+}
+
+let PrintMethod = "printGlobalOperand" in
+ def globaladdress : Operand<i32>;
+
+let PrintMethod = "printJumpTable" in
+ def jumptablebase : Operand<i32>;
+
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+def bblabel : Operand<i32>;
+def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf , [], "BasicBlockSDNode">;
+
+def symbolHi32 : Operand<i32> {
+ let PrintMethod = "printSymbolHi";
+}
+def symbolLo32 : Operand<i32> {
+ let PrintMethod = "printSymbolLo";
+}
+
+// Multi-class for logical operators.
+multiclass ALU32_rr_ri<string OpcStr, SDNode OpNode> {
+ def rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+ def ri : ALU32_ri<(outs IntRegs:$dst), (ins s10Imm:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "(#$b, $c)")),
+ [(set IntRegs:$dst, (OpNode s10Imm:$b, IntRegs:$c))]>;
+}
+
+// Multi-class for compare ops.
+let isCompare = 1 in {
+multiclass CMP64_rr<string OpcStr, PatFrag OpNode> {
+ def rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set PredRegs:$dst, (OpNode DoubleRegs:$b, DoubleRegs:$c))]>;
+}
+multiclass CMP32_rr<string OpcStr, PatFrag OpNode> {
+ def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+}
+
+multiclass CMP32_rr_ri_s10<string OpcStr, PatFrag OpNode> {
+ def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s10Imm:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, s10ImmPred:$c))]>;
+}
+
+multiclass CMP32_rr_ri_u9<string OpcStr, PatFrag OpNode> {
+ def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, u9ImmPred:$c))]>;
+}
+
+multiclass CMP32_ri_u9<string OpcStr, PatFrag OpNode> {
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, u9ImmPred:$c))]>;
+}
+
+multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> {
+ def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Imm:$c),
+ !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+ [(set PredRegs:$dst, (OpNode IntRegs:$b, s8ImmPred:$c))]>;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// http://qualnet.qualcomm.com/~erich/v1/htmldocs/index.html
+// http://qualnet.qualcomm.com/~erich/v2/htmldocs/index.html
+// http://qualnet.qualcomm.com/~erich/v3/htmldocs/index.html
+// http://qualnet.qualcomm.com/~erich/v4/htmldocs/index.html
+// http://qualnet.qualcomm.com/~erich/v5/htmldocs/index.html
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU32/ALU +
+//===----------------------------------------------------------------------===//
+// Add.
+let isPredicable = 1 in
+def ADD_rr : ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = add($src1, $src2)",
+ [(set IntRegs:$dst, (add IntRegs:$src1, IntRegs:$src2))]>;
+
+let isPredicable = 1 in
+def ADD_ri : ALU32_ri<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s16Imm:$src2),
+ "$dst = add($src1, #$src2)",
+ [(set IntRegs:$dst, (add IntRegs:$src1, s16ImmPred:$src2))]>;
+
+// Logical operations.
+let isPredicable = 1 in
+def XOR_rr : ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = xor($src1, $src2)",
+ [(set IntRegs:$dst, (xor IntRegs:$src1, IntRegs:$src2))]>;
+
+let isPredicable = 1 in
+def AND_rr : ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = and($src1, $src2)",
+ [(set IntRegs:$dst, (and IntRegs:$src1, IntRegs:$src2))]>;
+
+def OR_ri : ALU32_ri<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s8Imm:$src2),
+ "$dst = or($src1, #$src2)",
+ [(set IntRegs:$dst, (or IntRegs:$src1, s8ImmPred:$src2))]>;
+
+def NOT_rr : ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1),
+ "$dst = not($src1)",
+ [(set IntRegs:$dst, (not IntRegs:$src1))]>;
+
+def AND_ri : ALU32_ri<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s10Imm:$src2),
+ "$dst = and($src1, #$src2)",
+ [(set IntRegs:$dst, (and IntRegs:$src1, s10ImmPred:$src2))]>;
+
+let isPredicable = 1 in
+def OR_rr : ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = or($src1, $src2)",
+ [(set IntRegs:$dst, (or IntRegs:$src1, IntRegs:$src2))]>;
+
+// Negate.
+def NEG : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = neg($src1)",
+ [(set IntRegs:$dst, (ineg IntRegs:$src1))]>;
+// Nop.
+let neverHasSideEffects = 1 in
+def NOP : ALU32_rr<(outs), (ins),
+ "nop",
+ []>;
+
+// Subtract.
+let isPredicable = 1 in
+def SUB_rr : ALU32_rr<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = sub($src1, $src2)",
+ [(set IntRegs:$dst, (sub IntRegs:$src1, IntRegs:$src2))]>;
+
+// Transfer immediate.
+let isReMaterializable = 1, isPredicable = 1 in
+def TFRI : ALU32_ri<(outs IntRegs:$dst), (ins s16Imm:$src1),
+ "$dst = #$src1",
+ [(set IntRegs:$dst, s16ImmPred:$src1)]>;
+
+// Transfer register.
+let neverHasSideEffects = 1, isPredicable = 1 in
+def TFR : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = $src1",
+ []>;
+
+// Transfer control register.
+let neverHasSideEffects = 1 in
+def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1),
+ "$dst = $src1",
+ []>;
+//===----------------------------------------------------------------------===//
+// ALU32/ALU -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PERM +
+//===----------------------------------------------------------------------===//
+
+// Combine.
+let isPredicable = 1, neverHasSideEffects = 1 in
+def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = combine($src1, $src2)",
+ []>;
+
+// Mux.
+def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
+ DoubleRegs:$src2,
+ DoubleRegs:$src3),
+ "$dst = vmux($src1, $src2, $src3)",
+ []>;
+
+def MUX_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ IntRegs:$src2, IntRegs:$src3),
+ "$dst = mux($src1, $src2, $src3)",
+ [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+def MUX_ir : ALU32_ir<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2,
+ IntRegs:$src3),
+ "$dst = mux($src1, #$src2, $src3)",
+ [(set IntRegs:$dst, (select PredRegs:$src1,
+ s8ImmPred:$src2, IntRegs:$src3))]>;
+
+def MUX_ri : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2,
+ s8Imm:$src3),
+ "$dst = mux($src1, $src2, #$src3)",
+ [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2,
+ s8ImmPred:$src3))]>;
+
+def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2,
+ s8Imm:$src3),
+ "$dst = mux($src1, #$src2, #$src3)",
+ [(set IntRegs:$dst, (select PredRegs:$src1, s8ImmPred:$src2,
+ s8ImmPred:$src3))]>;
+
+// Shift halfword.
+let isPredicable = 1 in
+def ASLH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = aslh($src1)",
+ [(set IntRegs:$dst, (shl 16, IntRegs:$src1))]>;
+
+let isPredicable = 1 in
+def ASRH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = asrh($src1)",
+ [(set IntRegs:$dst, (sra 16, IntRegs:$src1))]>;
+
+// Sign extend.
+let isPredicable = 1 in
+def SXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = sxtb($src1)",
+ [(set IntRegs:$dst, (sext_inreg IntRegs:$src1, i8))]>;
+
+let isPredicable = 1 in
+def SXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = sxth($src1)",
+ [(set IntRegs:$dst, (sext_inreg IntRegs:$src1, i16))]>;
+
+// Zero extend.
+let isPredicable = 1, neverHasSideEffects = 1 in
+def ZXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = zxtb($src1)",
+ []>;
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = zxth($src1)",
+ []>;
+//===----------------------------------------------------------------------===//
+// ALU32/PERM -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PRED +
+//===----------------------------------------------------------------------===//
+
+// Conditional add.
+let neverHasSideEffects = 1 in
+def ADD_ri_cPt : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+ "if ($src1) $dst = add($src2, #$src3)",
+ []>;
+
+let neverHasSideEffects = 1 in
+def ADD_ri_cNotPt : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+ "if (!$src1) $dst = add($src2, #$src3)",
+ []>;
+
+let neverHasSideEffects = 1 in
+def ADD_ri_cdnPt : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+ "if ($src1.new) $dst = add($src2, #$src3)",
+ []>;
+
+let neverHasSideEffects = 1 in
+def ADD_ri_cdnNotPt : ALU32_ri<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+ "if (!$src1.new) $dst = add($src2, #$src3)",
+ []>;
+
+let neverHasSideEffects = 1 in
+def ADD_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst = add($src2, $src3)",
+ []>;
+
+let neverHasSideEffects = 1 in
+def ADD_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst = add($src2, $src3)",
+ []>;
+
+let neverHasSideEffects = 1 in
+def ADD_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst = add($src2, $src3)",
+ []>;
+
+let neverHasSideEffects = 1 in
+def ADD_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst = add($src2, $src3)",
+ []>;
+
+
+// Conditional combine.
+
+let neverHasSideEffects = 1 in
+def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst = combine($src2, $src3)",
+ []>;
+
+let neverHasSideEffects = 1 in
+def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst = combine($src2, $src3)",
+ []>;
+
+let neverHasSideEffects = 1 in
+def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst = combine($src2, $src3)",
+ []>;
+
+let neverHasSideEffects = 1 in
+def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst = combine($src2, $src3)",
+ []>;
+
+// Conditional logical operations.
+
+def XOR_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst = xor($src2, $src3)",
+ []>;
+
+def XOR_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst = xor($src2, $src3)",
+ []>;
+
+def XOR_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst = xor($src2, $src3)",
+ []>;
+
+def XOR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst = xor($src2, $src3)",
+ []>;
+
+def AND_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst = and($src2, $src3)",
+ []>;
+
+def AND_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst = and($src2, $src3)",
+ []>;
+
+def AND_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst = and($src2, $src3)",
+ []>;
+
+def AND_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst = and($src2, $src3)",
+ []>;
+
+def OR_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst = or($src2, $src3)",
+ []>;
+
+def OR_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst = or($src2, $src3)",
+ []>;
+
+def OR_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst = or($src2, $src3)",
+ []>;
+
+def OR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst = or($src2, $src3)",
+ []>;
+
+
+// Conditional subtract.
+
+def SUB_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst = sub($src2, $src3)",
+ []>;
+
+def SUB_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst = sub($src2, $src3)",
+ []>;
+
+def SUB_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst = sub($src2, $src3)",
+ []>;
+
+def SUB_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst = sub($src2, $src3)",
+ []>;
+
+
+// Conditional transfer.
+
+let neverHasSideEffects = 1 in
+def TFR_cPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = $src2",
+ []>;
+
+let neverHasSideEffects = 1 in
+def TFR_cNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ IntRegs:$src2),
+ "if (!$src1) $dst = $src2",
+ []>;
+
+let neverHasSideEffects = 1 in
+def TFRI_cPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2),
+ "if ($src1) $dst = #$src2",
+ []>;
+
+let neverHasSideEffects = 1 in
+def TFRI_cNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ s12Imm:$src2),
+ "if (!$src1) $dst = #$src2",
+ []>;
+
+let neverHasSideEffects = 1 in
+def TFR_cdnPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ IntRegs:$src2),
+ "if ($src1.new) $dst = $src2",
+ []>;
+
+let neverHasSideEffects = 1 in
+def TFR_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ IntRegs:$src2),
+ "if (!$src1.new) $dst = $src2",
+ []>;
+
+let neverHasSideEffects = 1 in
+def TFRI_cdnPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ s12Imm:$src2),
+ "if ($src1.new) $dst = #$src2",
+ []>;
+
+let neverHasSideEffects = 1 in
+def TFRI_cdnNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ s12Imm:$src2),
+ "if (!$src1.new) $dst = #$src2",
+ []>;
+
+// Compare.
+defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", setugt>;
+defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", setgt>;
+defm CMPLT : CMP32_rr<"cmp.lt", setlt>;
+defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", seteq>;
+defm CMPGE : CMP32_ri_s8<"cmp.ge", setge>;
+defm CMPGEU : CMP32_ri_u9<"cmp.geu", setuge>;
+//===----------------------------------------------------------------------===//
+// ALU32/PRED -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU32/VH +
+//===----------------------------------------------------------------------===//
+// Vector add halfwords
+
+// Vector averagehalfwords
+
+// Vector subtract halfwords
+//===----------------------------------------------------------------------===//
+// ALU32/VH -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU +
+//===----------------------------------------------------------------------===//
+// Add.
+def ADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = add($src1, $src2)",
+ [(set DoubleRegs:$dst, (add DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+// Add halfword.
+
+// Compare.
+defm CMPEHexagon4 : CMP64_rr<"cmp.eq", seteq>;
+defm CMPGT64 : CMP64_rr<"cmp.gt", setgt>;
+defm CMPGTU64 : CMP64_rr<"cmp.gtu", setugt>;
+
+// Logical operations.
+def AND_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = and($src1, $src2)",
+ [(set DoubleRegs:$dst, (and DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+def OR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = or($src1, $src2)",
+ [(set DoubleRegs:$dst, (or DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+def XOR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = xor($src1, $src2)",
+ [(set DoubleRegs:$dst, (xor DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+// Maximum.
+def MAXw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = max($src2, $src1)",
+ [(set IntRegs:$dst, (select (i1 (setlt IntRegs:$src2,
+ IntRegs:$src1)),
+ IntRegs:$src1, IntRegs:$src2))]>;
+
+// Minimum.
+def MINw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = min($src2, $src1)",
+ [(set IntRegs:$dst, (select (i1 (setgt IntRegs:$src2,
+ IntRegs:$src1)),
+ IntRegs:$src1, IntRegs:$src2))]>;
+
+// Subtract.
+def SUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = sub($src1, $src2)",
+ [(set DoubleRegs:$dst, (sub DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+// Subtract halfword.
+
+// Transfer register.
+let neverHasSideEffects = 1 in
+def TFR_64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
+ "$dst = $src1",
+ []>;
+//===----------------------------------------------------------------------===//
+// ALU64/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/BIT +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/BIT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/PERM +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/PERM -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/VB +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/VB -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/VH +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/VH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/VW +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/VW -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// CR +
+//===----------------------------------------------------------------------===//
+// Logical reductions on predicates.
+
+// Looping instructions.
+
+// Pipelined looping instructions.
+
+// Logical operations on predicates.
+def AND_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
+ "$dst = and($src1, $src2)",
+ [(set PredRegs:$dst, (and PredRegs:$src1, PredRegs:$src2))]>;
+
+let neverHasSideEffects = 1 in
+def AND_pnotp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1,
+ PredRegs:$src2),
+ "$dst = and($src1, !$src2)",
+ []>;
+
+def NOT_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+ "$dst = not($src1)",
+ [(set PredRegs:$dst, (not PredRegs:$src1))]>;
+
+def ANY_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+ "$dst = any8($src1)",
+ []>;
+
+def ALL_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+ "$dst = all8($src1)",
+ []>;
+
+def VITPACK_pp : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ PredRegs:$src2),
+ "$dst = vitpack($src1, $src2)",
+ []>;
+
+def VALIGN_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2,
+ PredRegs:$src3),
+ "$dst = valignb($src1, $src2, $src3)",
+ []>;
+
+def VSPLICE_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2,
+ PredRegs:$src3),
+ "$dst = vspliceb($src1, $src2, $src3)",
+ []>;
+
+def MASK_p : SInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1),
+ "$dst = mask($src1)",
+ []>;
+
+def NOT_Ps : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+ "$dst = not($src1)",
+ [(set PredRegs:$dst, (not PredRegs:$src1))]>;
+
+def OR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
+ "$dst = or($src1, $src2)",
+ [(set PredRegs:$dst, (or PredRegs:$src1, PredRegs:$src2))]>;
+
+def XOR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
+ "$dst = xor($src1, $src2)",
+ [(set PredRegs:$dst, (xor PredRegs:$src1, PredRegs:$src2))]>;
+
+
+// User control register transfer.
+//===----------------------------------------------------------------------===//
+// CR -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// J +
+//===----------------------------------------------------------------------===//
+// Jump to address.
+let isBranch = 1, isTerminator=1, isBarrier = 1, isPredicable = 1 in {
+ def JMP : JInst< (outs),
+ (ins brtarget:$offset),
+ "jump $offset",
+ [(br bb:$offset)]>;
+}
+
+// if (p0) jump
+let isBranch = 1, isTerminator=1, Defs = [PC] in {
+ def JMP_Pred : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if ($src) jump $offset",
+ [(brcond PredRegs:$src, bb:$offset)]>;
+}
+
+// if (!p0) jump
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
+ def JMP_PredNot : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if (!$src) jump $offset",
+ []>;
+}
+
+let isTerminator = 1, isBranch = 1, neverHasSideEffects = 1, Defs = [PC] in {
+ def BRCOND : JInst < (outs), (ins PredRegs:$pred, brtarget:$dst),
+ "if ($pred) jump $dst",
+ []>;
+}
+
+// Jump to address conditioned on new predicate.
+// if (p0) jump:t
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
+ def JMP_PredPt : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if ($src.new) jump:t $offset",
+ []>;
+}
+
+// if (!p0) jump:t
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
+ def JMP_PredNotPt : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if (!$src.new) jump:t $offset",
+ []>;
+}
+
+// Not taken.
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
+ def JMP_PredPnt : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if ($src.new) jump:nt $offset",
+ []>;
+}
+
+// Not taken.
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
+ def JMP_PredNotPnt : JInst< (outs),
+ (ins PredRegs:$src, brtarget:$offset),
+ "if (!$src.new) jump:nt $offset",
+ []>;
+}
+//===----------------------------------------------------------------------===//
+// J -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// JR +
+//===----------------------------------------------------------------------===//
+def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+// Jump to address from register.
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR: JRInst<(outs), (ins),
+ "jumpr r31",
+ [(retflag)]>;
+}
+
+// Jump to address from register.
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cPt: JRInst<(outs), (ins PredRegs:$src1),
+ "if ($src1) jumpr r31",
+ []>;
+}
+
+// Jump to address from register.
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cNotPt: JRInst<(outs), (ins PredRegs:$src1),
+ "if (!$src1) jumpr r31",
+ []>;
+}
+
+//===----------------------------------------------------------------------===//
+// JR -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LD +
+//===----------------------------------------------------------------------===//
+///
+/// Make sure that in post increment load, the first operand is always the post
+/// increment operand.
+///
+// Load doubleword.
+let isPredicable = 1 in
+def LDrid : LDInst<(outs DoubleRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memd($addr)",
+ [(set DoubleRegs:$dst, (load ADDRriS11_3:$addr))]>;
+
+let isPredicable = 1, AddedComplexity = 20 in
+def LDrid_indexed : LDInst<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, s11_3Imm:$offset),
+ "$dst=memd($src1+#$offset)",
+ [(set DoubleRegs:$dst, (load (add IntRegs:$src1,
+ s11_3ImmPred:$offset)))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_GP : LDInst<(outs DoubleRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memd(#$global+$offset)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDd_GP : LDInst<(outs DoubleRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memd(#$global)",
+ []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid : LDInstPI<(outs DoubleRegs:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, s4Imm:$offset),
+ "$dst = memd($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+// Load doubleword conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_cPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1) $dst = memd($addr)",
+ []>;
+
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_cNotPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1) $dst = memd($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_indexed_cPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
+ "if ($src1) $dst=memd($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_indexed_cNotPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
+ "if (!$src1) $dst=memd($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid_cPt : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
+ "if ($src1) $dst1 = memd($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid_cNotPt : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
+ "if (!$src1) $dst1 = memd($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_cdnPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1.new) $dst = memd($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_cdnNotPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1.new) $dst = memd($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_indexed_cdnPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
+ "if ($src1.new) $dst=memd($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_indexed_cdnNotPt : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
+ "if (!$src1.new) $dst=memd($src2+#$src3)",
+ []>;
+
+
+// Load byte.
+let isPredicable = 1 in
+def LDrib : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memb($addr)",
+ [(set IntRegs:$dst, (sextloadi8 ADDRriS11_0:$addr))]>;
+
+def LDrib_ae : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memb($addr)",
+ [(set IntRegs:$dst, (extloadi8 ADDRriS11_0:$addr))]>;
+
+// Indexed load byte.
+let isPredicable = 1, AddedComplexity = 20 in
+def LDrib_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_0Imm:$offset),
+ "$dst=memb($src1+#$offset)",
+ [(set IntRegs:$dst, (sextloadi8 (add IntRegs:$src1,
+ s11_0ImmPred:$offset)))]>;
+
+
+// Indexed load byte any-extend.
+let AddedComplexity = 20 in
+def LDrib_ae_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_0Imm:$offset),
+ "$dst=memb($src1+#$offset)",
+ [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1,
+ s11_0ImmPred:$offset)))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memb(#$global+$offset)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDb_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memb(#$global)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDub_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memub(#$global)",
+ []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, s4Imm:$offset),
+ "$dst = memb($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+// Load byte conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1) $dst = memb($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1) $dst = memb($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_indexed_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if ($src1) $dst = memb($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if (!$src1) $dst = memb($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if ($src1) $dst1 = memb($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if (!$src1) $dst1 = memb($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1.new) $dst = memb($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1.new) $dst = memb($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if ($src1.new) $dst = memb($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if (!$src1.new) $dst = memb($src2+#$src3)",
+ []>;
+
+
+// Load halfword.
+let isPredicable = 1 in
+def LDrih : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memh($addr)",
+ [(set IntRegs:$dst, (sextloadi16 ADDRriS11_1:$addr))]>;
+
+let isPredicable = 1, AddedComplexity = 20 in
+def LDrih_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_1Imm:$offset),
+ "$dst=memh($src1+#$offset)",
+ [(set IntRegs:$dst, (sextloadi16 (add IntRegs:$src1,
+ s11_1ImmPred:$offset)))] >;
+
+def LDrih_ae : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memh($addr)",
+ [(set IntRegs:$dst, (extloadi16 ADDRriS11_1:$addr))]>;
+
+let AddedComplexity = 20 in
+def LDrih_ae_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_1Imm:$offset),
+ "$dst=memh($src1+#$offset)",
+ [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1,
+ s11_1ImmPred:$offset)))] >;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memh(#$global+$offset)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDh_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memh(#$global)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDuh_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memuh(#$global)",
+ []>;
+
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, s4Imm:$offset),
+ "$dst = memh($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+// Load halfword conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1) $dst = memh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1) $dst = memh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_indexed_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if ($src1) $dst = memh($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if (!$src1) $dst = memh($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if ($src1) $dst1 = memh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if (!$src1) $dst1 = memh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1.new) $dst = memh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1.new) $dst = memh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if ($src1.new) $dst = memh($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if (!$src1.new) $dst = memh($src2+#$src3)",
+ []>;
+
+// Load unsigned byte.
+let isPredicable = 1 in
+def LDriub : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memub($addr)",
+ [(set IntRegs:$dst, (zextloadi8 ADDRriS11_0:$addr))]>;
+
+let isPredicable = 1 in
+def LDriubit : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memub($addr)",
+ [(set IntRegs:$dst, (zextloadi1 ADDRriS11_0:$addr))]>;
+
+let isPredicable = 1, AddedComplexity = 20 in
+def LDriub_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_0Imm:$offset),
+ "$dst=memub($src1+#$offset)",
+ [(set IntRegs:$dst, (zextloadi8 (add IntRegs:$src1,
+ s11_0ImmPred:$offset)))]>;
+
+let AddedComplexity = 20 in
+def LDriubit_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_0Imm:$offset),
+ "$dst=memub($src1+#$offset)",
+ [(set IntRegs:$dst, (zextloadi1 (add IntRegs:$src1,
+ s11_0ImmPred:$offset)))]>;
+
+def LDriub_ae : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memub($addr)",
+ [(set IntRegs:$dst, (extloadi8 ADDRriS11_0:$addr))]>;
+
+
+let AddedComplexity = 20 in
+def LDriub_ae_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_0Imm:$offset),
+ "$dst=memub($src1+#$offset)",
+ [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1,
+ s11_0ImmPred:$offset)))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memub(#$global+$offset)",
+ []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, s4Imm:$offset),
+ "$dst = memub($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+// Load unsigned byte conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1) $dst = memub($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1) $dst = memub($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_indexed_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if ($src1) $dst = memub($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if (!$src1) $dst = memub($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if ($src1) $dst1 = memub($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if (!$src1) $dst1 = memub($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1.new) $dst = memub($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1.new) $dst = memub($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if ($src1.new) $dst = memub($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+ "if (!$src1.new) $dst = memub($src2+#$src3)",
+ []>;
+
+// Load unsigned halfword.
+let isPredicable = 1 in
+def LDriuh : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memuh($addr)",
+ [(set IntRegs:$dst, (zextloadi16 ADDRriS11_1:$addr))]>;
+
+// Indexed load unsigned halfword.
+let isPredicable = 1, AddedComplexity = 20 in
+def LDriuh_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_1Imm:$offset),
+ "$dst=memuh($src1+#$offset)",
+ [(set IntRegs:$dst, (zextloadi16 (add IntRegs:$src1,
+ s11_1ImmPred:$offset)))]>;
+
+def LDriuh_ae : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr),
+ "$dst = memuh($addr)",
+ [(set IntRegs:$dst, (extloadi16 ADDRriS11_1:$addr))]>;
+
+
+// Indexed load unsigned halfword any-extend.
+let AddedComplexity = 20 in
+def LDriuh_ae_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_1Imm:$offset),
+ "$dst=memuh($src1+#$offset)",
+ [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1,
+ s11_1ImmPred:$offset)))] >;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memuh(#$global+$offset)",
+ []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, s4Imm:$offset),
+ "$dst = memuh($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+// Load unsigned halfword conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1) $dst = memuh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1) $dst = memuh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_indexed_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if ($src1) $dst = memuh($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if (!$src1) $dst = memuh($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if ($src1) $dst1 = memuh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if (!$src1) $dst1 = memuh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1.new) $dst = memuh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1.new) $dst = memuh($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if ($src1.new) $dst = memuh($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+ "if (!$src1.new) $dst = memuh($src2+#$src3)",
+ []>;
+
+
+// Load word.
+let isPredicable = 1 in
+def LDriw : LDInst<(outs IntRegs:$dst),
+ (ins MEMri:$addr), "$dst = memw($addr)",
+ [(set IntRegs:$dst, (load ADDRriS11_2:$addr))]>;
+
+// Load predicate.
+let mayLoad = 1, Defs = [R10,R11] in
+def LDriw_pred : LDInst<(outs PredRegs:$dst),
+ (ins MEMri:$addr),
+ "Error; should not emit",
+ []>;
+
+// Indexed load.
+let isPredicable = 1, AddedComplexity = 20 in
+def LDriw_indexed : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s11_2Imm:$offset),
+ "$dst=memw($src1+#$offset)",
+ [(set IntRegs:$dst, (load (add IntRegs:$src1,
+ s11_2ImmPred:$offset)))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global, u16Imm:$offset),
+ "$dst=memw(#$global+$offset)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDw_GP : LDInst<(outs IntRegs:$dst),
+ (ins globaladdress:$global),
+ "$dst=memw(#$global)",
+ []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+ (ins IntRegs:$src1, s4Imm:$offset),
+ "$dst = memw($src1++#$offset)",
+ [],
+ "$src1 = $dst2">;
+
+// Load word conditionally.
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1) $dst = memw($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1) $dst = memw($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_indexed_cPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
+ "if ($src1) $dst=memw($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
+ "if (!$src1) $dst=memw($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
+ "if ($src1) $dst1 = memw($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
+ "if (!$src1) $dst1 = memw($src2++#$src3)",
+ [],
+ "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if ($src1.new) $dst = memw($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, MEMri:$addr),
+ "if (!$src1.new) $dst = memw($addr)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
+ "if ($src1.new) $dst=memw($src2+#$src3)",
+ []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
+ "if (!$src1.new) $dst=memw($src2+#$src3)",
+ []>;
+
+// Deallocate stack frame.
+let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in {
+ def DEALLOCFRAME : LDInst<(outs), (ins i32imm:$amt1),
+ "deallocframe",
+ []>;
+}
+
+// Load and unpack bytes to halfwords.
+//===----------------------------------------------------------------------===//
+// LD -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/ALU +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/COMPLEX +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/COMPLEX -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYH +
+//===----------------------------------------------------------------------===//
+// Multiply and use lower result.
+// Rd=+mpyi(Rs,#u8)
+def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
+ "$dst =+ mpyi($src1, #$src2)",
+ [(set IntRegs:$dst, (mul IntRegs:$src1, u8ImmPred:$src2))]>;
+
+// Rd=-mpyi(Rs,#u8)
+def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, n8Imm:$src2),
+ "$dst =- mpyi($src1, #$src2)",
+ [(set IntRegs:$dst,
+ (mul IntRegs:$src1, n8ImmPred:$src2))]>;
+
+// Rd=mpyi(Rs,#m9)
+// s9 is NOT the same as m9 - but it works.. so far.
+// Assembler maps to either Rd=+mpyi(Rs,#u8 or Rd=-mpyi(Rs,#u8)
+// depending on the value of m9. See Arch Spec.
+def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2),
+ "$dst = mpyi($src1, #$src2)",
+ [(set IntRegs:$dst, (mul IntRegs:$src1, s9ImmPred:$src2))]>;
+
+// Rd=mpyi(Rs,Rt)
+def MPYI : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpyi($src1, $src2)",
+ [(set IntRegs:$dst, (mul IntRegs:$src1, IntRegs:$src2))]>;
+
+// Rx+=mpyi(Rs,#u8)
+def MPYI_acc_ri : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3),
+ "$dst += mpyi($src2, #$src3)",
+ [(set IntRegs:$dst,
+ (add (mul IntRegs:$src2, u8ImmPred:$src3), IntRegs:$src1))],
+ "$src1 = $dst">;
+
+// Rx+=mpyi(Rs,Rt)
+def MPYI_acc_rr : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst += mpyi($src2, $src3)",
+ [(set IntRegs:$dst,
+ (add (mul IntRegs:$src2, IntRegs:$src3), IntRegs:$src1))],
+ "$src1 = $dst">;
+
+// Rx-=mpyi(Rs,#u8)
+def MPYI_sub_ri : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3),
+ "$dst -= mpyi($src2, #$src3)",
+ [(set IntRegs:$dst,
+ (sub IntRegs:$src1, (mul IntRegs:$src2, u8ImmPred:$src3)))],
+ "$src1 = $dst">;
+
+// Multiply and use upper result.
+// Rd=mpy(Rs,Rt.H):<<1:rnd:sat
+// Rd=mpy(Rs,Rt.L):<<1:rnd:sat
+// Rd=mpy(Rs,Rt)
+def MPY : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpy($src1, $src2)",
+ [(set IntRegs:$dst, (mulhs IntRegs:$src1, IntRegs:$src2))]>;
+
+// Rd=mpy(Rs,Rt):rnd
+// Rd=mpyu(Rs,Rt)
+def MPYU : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpyu($src1, $src2)",
+ [(set IntRegs:$dst, (mulhu IntRegs:$src1, IntRegs:$src2))]>;
+
+// Multiply and use full result.
+// Rdd=mpyu(Rs,Rt)
+def MPYU64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpyu($src1, $src2)",
+ [(set DoubleRegs:$dst, (mul (i64 (anyext IntRegs:$src1)),
+ (i64 (anyext IntRegs:$src2))))]>;
+
+// Rdd=mpy(Rs,Rt)
+def MPY64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpy($src1, $src2)",
+ [(set DoubleRegs:$dst, (mul (i64 (sext IntRegs:$src1)),
+ (i64 (sext IntRegs:$src2))))]>;
+
+
+// Multiply and accumulate, use full result.
+// Rxx[+-]=mpy(Rs,Rt)
+// Rxx+=mpy(Rs,Rt)
+def MPY64_acc : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst += mpy($src2, $src3)",
+ [(set DoubleRegs:$dst,
+ (add (mul (i64 (sext IntRegs:$src2)), (i64 (sext IntRegs:$src3))),
+ DoubleRegs:$src1))],
+ "$src1 = $dst">;
+
+// Rxx-=mpy(Rs,Rt)
+def MPY64_sub : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst -= mpy($src2, $src3)",
+ [(set DoubleRegs:$dst,
+ (sub DoubleRegs:$src1,
+ (mul (i64 (sext IntRegs:$src2)), (i64 (sext IntRegs:$src3)))))],
+ "$src1 = $dst">;
+
+// Rxx[+-]=mpyu(Rs,Rt)
+// Rxx+=mpyu(Rs,Rt)
+def MPYU64_acc : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ IntRegs:$src2, IntRegs:$src3),
+ "$dst += mpyu($src2, $src3)",
+ [(set DoubleRegs:$dst, (add (mul (i64 (anyext IntRegs:$src2)),
+ (i64 (anyext IntRegs:$src3))),
+ DoubleRegs:$src1))],"$src1 = $dst">;
+
+// Rxx-=mpyu(Rs,Rt)
+def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst += mpyu($src2, $src3)",
+ [(set DoubleRegs:$dst,
+ (sub DoubleRegs:$src1,
+ (mul (i64 (anyext IntRegs:$src2)),
+ (i64 (anyext IntRegs:$src3)))))],
+ "$src1 = $dst">;
+
+
+def ADDrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+ IntRegs:$src2, IntRegs:$src3),
+ "$dst += add($src2, $src3)",
+ [(set IntRegs:$dst, (add (add IntRegs:$src2, IntRegs:$src3),
+ IntRegs:$src1))],
+ "$src1 = $dst">;
+
+def ADDri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+ IntRegs:$src2, s8Imm:$src3),
+ "$dst += add($src2, #$src3)",
+ [(set IntRegs:$dst, (add (add IntRegs:$src2, s8ImmPred:$src3),
+ IntRegs:$src1))],
+ "$src1 = $dst">;
+
+def SUBrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+ IntRegs:$src2, IntRegs:$src3),
+ "$dst -= add($src2, $src3)",
+ [(set IntRegs:$dst, (sub IntRegs:$src1, (add IntRegs:$src2,
+ IntRegs:$src3)))],
+ "$src1 = $dst">;
+
+def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+ IntRegs:$src2, s8Imm:$src3),
+ "$dst -= add($src2, #$src3)",
+ [(set IntRegs:$dst, (sub IntRegs:$src1,
+ (add IntRegs:$src2, s8ImmPred:$src3)))],
+ "$src1 = $dst">;
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYS +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYS -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/VB +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/VB -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/VH +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/VH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ST +
+//===----------------------------------------------------------------------===//
+///
+/// Assumptions::: ****** DO NOT IGNORE ********
+/// 1. Make sure that in post increment store, the zero'th operand is always the
+/// post increment operand.
+/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the
+/// last operand.
+///
+// Store doubleword.
+let isPredicable = 1 in
+def STrid : STInst<(outs),
+ (ins MEMri:$addr, DoubleRegs:$src1),
+ "memd($addr) = $src1",
+ [(store DoubleRegs:$src1, ADDRriS11_3:$addr)]>;
+
+// Indexed store double word.
+let AddedComplexity = 10, isPredicable = 1 in
+def STrid_indexed : STInst<(outs),
+ (ins IntRegs:$src1, s11_3Imm:$src2, DoubleRegs:$src3),
+ "memd($src1+#$src2) = $src3",
+ [(store DoubleRegs:$src3,
+ (add IntRegs:$src1, s11_3ImmPred:$src2))]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrid_GP : STInst<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src),
+ "memd(#$global+$offset) = $src",
+ []>;
+
+let hasCtrlDep = 1, isPredicable = 1 in
+def POST_STdri : STInstPI<(outs IntRegs:$dst),
+ (ins DoubleRegs:$src1, IntRegs:$src2, s4Imm:$offset),
+ "memd($src2++#$offset) = $src1",
+ [(set IntRegs:$dst,
+ (post_store DoubleRegs:$src1, IntRegs:$src2, s4_3ImmPred:$offset))],
+ "$src2 = $dst">;
+
+// Store doubleword conditionally.
+// if ([!]Pv) memd(Rs+#u6:3)=Rtt
+// if (Pv) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_cPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
+ "if ($src1) memd($addr) = $src2",
+ []>;
+
+// if (!Pv) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
+ "if (!$src1) memd($addr) = $src2",
+ []>;
+
+// if (Pv) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_cPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
+ DoubleRegs:$src4),
+ "if ($src1) memd($src2+#$src3) = $src4",
+ []>;
+
+// if (!Pv) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
+ DoubleRegs:$src4),
+ "if (!$src1) memd($src2+#$src3) = $src4",
+ []>;
+
+// if ([!]Pv) memd(Rx++#s4:3)=Rtt
+// if (Pv) memd(Rx++#s4:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def POST_STdri_cPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
+ s4_3Imm:$offset),
+ "if ($src1) memd($src3++#$offset) = $src2",
+ [],
+ "$src3 = $dst">;
+
+// if (!Pv) memd(Rx++#s4:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def POST_STdri_cNotPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
+ s4_3Imm:$offset),
+ "if (!$src1) memd($src3++#$offset) = $src2",
+ [],
+ "$src3 = $dst">;
+
+
+// Store byte.
+// memb(Rs+#s11:0)=Rt
+let isPredicable = 1 in
+def STrib : STInst<(outs),
+ (ins MEMri:$addr, IntRegs:$src1),
+ "memb($addr) = $src1",
+ [(truncstorei8 IntRegs:$src1, ADDRriS11_0:$addr)]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def STrib_indexed : STInst<(outs),
+ (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3),
+ "memb($src1+#$src2) = $src3",
+ [(truncstorei8 IntRegs:$src3, (add IntRegs:$src1,
+ s11_0ImmPred:$src2))]>;
+
+// memb(gp+#u16:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP : STInst<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memb(#$global+$offset) = $src",
+ []>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP : STInst<(outs),
+ (ins globaladdress:$global, IntRegs:$src),
+ "memb(#$global) = $src",
+ []>;
+
+// memb(Rx++#s4:0)=Rt
+let hasCtrlDep = 1, isPredicable = 1 in
+def POST_STbri : STInstPI<(outs IntRegs:$dst), (ins IntRegs:$src1,
+ IntRegs:$src2,
+ s4Imm:$offset),
+ "memb($src2++#$offset) = $src1",
+ [(set IntRegs:$dst,
+ (post_truncsti8 IntRegs:$src1, IntRegs:$src2,
+ s4_0ImmPred:$offset))],
+ "$src2 = $dst">;
+
+// Store byte conditionally.
+// if ([!]Pv) memb(Rs+#u6:0)=Rt
+// if (Pv) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1) memb($addr) = $src2",
+ []>;
+
+// if (!Pv) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1) memb($addr) = $src2",
+ []>;
+
+// if (Pv) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if ($src1) memb($src2+#$src3) = $src4",
+ []>;
+
+// if (!Pv) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if (!$src1) memb($src2+#$src3) = $src4",
+ []>;
+
+// if ([!]Pv) memb(Rx++#s4:0)=Rt
+// if (Pv) memb(Rx++#s4:0)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if ($src1) memb($src3++#$offset) = $src2",
+ [],"$src3 = $dst">;
+
+// if (!Pv) memb(Rx++#s4:0)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cNotPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if (!$src1) memb($src3++#$offset) = $src2",
+ [],"$src3 = $dst">;
+
+
+// Store halfword.
+// memh(Rs+#s11:1)=Rt
+let isPredicable = 1 in
+def STrih : STInst<(outs),
+ (ins MEMri:$addr, IntRegs:$src1),
+ "memh($addr) = $src1",
+ [(truncstorei16 IntRegs:$src1, ADDRriS11_1:$addr)]>;
+
+
+let AddedComplexity = 10, isPredicable = 1 in
+def STrih_indexed : STInst<(outs),
+ (ins IntRegs:$src1, s11_1Imm:$src2, IntRegs:$src3),
+ "memh($src1+#$src2) = $src3",
+ [(truncstorei16 IntRegs:$src3, (add IntRegs:$src1,
+ s11_1ImmPred:$src2))]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP : STInst<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memh(#$global+$offset) = $src",
+ []>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP : STInst<(outs),
+ (ins globaladdress:$global, IntRegs:$src),
+ "memh(#$global) = $src",
+ []>;
+
+// memh(Rx++#s4:1)=Rt.H
+// memh(Rx++#s4:1)=Rt
+let hasCtrlDep = 1, isPredicable = 1 in
+def POST_SThri : STInstPI<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset),
+ "memh($src2++#$offset) = $src1",
+ [(set IntRegs:$dst,
+ (post_truncsti16 IntRegs:$src1, IntRegs:$src2,
+ s4_1ImmPred:$offset))],
+ "$src2 = $dst">;
+
+// Store halfword conditionally.
+// if ([!]Pv) memh(Rs+#u6:1)=Rt
+// if (Pv) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1) memh($addr) = $src2",
+ []>;
+
+// if (!Pv) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1) memh($addr) = $src2",
+ []>;
+
+// if (Pv) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if ($src1) memh($src2+#$src3) = $src4",
+ []>;
+
+// if (!Pv) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if (!$src1) memh($src2+#$src3) = $src4",
+ []>;
+
+// if ([!]Pv) memh(Rx++#s4:1)=Rt
+// if (Pv) memh(Rx++#s4:1)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if ($src1) memh($src3++#$offset) = $src2",
+ [],"$src3 = $dst">;
+
+// if (!Pv) memh(Rx++#s4:1)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cNotPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if (!$src1) memh($src3++#$offset) = $src2",
+ [],"$src3 = $dst">;
+
+
+// Store word.
+// Store predicate.
+let Defs = [R10,R11] in
+def STriw_pred : STInst<(outs),
+ (ins MEMri:$addr, PredRegs:$src1),
+ "Error; should not emit",
+ []>;
+
+// memw(Rs+#s11:2)=Rt
+let isPredicable = 1 in
+def STriw : STInst<(outs),
+ (ins MEMri:$addr, IntRegs:$src1),
+ "memw($addr) = $src1",
+ [(store IntRegs:$src1, ADDRriS11_2:$addr)]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def STriw_indexed : STInst<(outs),
+ (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
+ "memw($src1+#$src2) = $src3",
+ [(store IntRegs:$src3, (add IntRegs:$src1, s11_2ImmPred:$src2))]>;
+
+def STriwt : STInst<(outs),
+ (ins MEMri:$addr, DoubleRegs:$src1),
+ "memw($addr) = $src1",
+ [(truncstorei32 DoubleRegs:$src1, ADDRriS11_2:$addr)]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP : STInst<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memw(#$global+$offset) = $src",
+ []>;
+
+let hasCtrlDep = 1, isPredicable = 1 in
+def POST_STwri : STInstPI<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset),
+ "memw($src2++#$offset) = $src1",
+ [(set IntRegs:$dst,
+ (post_store IntRegs:$src1, IntRegs:$src2, s4_2ImmPred:$offset))],
+ "$src2 = $dst">;
+
+// Store word conditionally.
+// if ([!]Pv) memw(Rs+#u6:2)=Rt
+// if (Pv) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1) memw($addr) = $src2",
+ []>;
+
+// if (!Pv) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1) memw($addr) = $src2",
+ []>;
+
+// if (Pv) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if ($src1) memw($src2+#$src3) = $src4",
+ []>;
+
+// if (!Pv) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cNotPt : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if (!$src1) memw($src2+#$src3) = $src4",
+ []>;
+
+// if ([!]Pv) memw(Rx++#s4:2)=Rt
+// if (Pv) memw(Rx++#s4:2)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if ($src1) memw($src3++#$offset) = $src2",
+ [],"$src3 = $dst">;
+
+// if (!Pv) memw(Rx++#s4:2)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cNotPt : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if (!$src1) memw($src3++#$offset) = $src2",
+ [],"$src3 = $dst">;
+
+
+
+// Allocate stack frame.
+let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in {
+ def ALLOCFRAME : STInst<(outs),
+ (ins i32imm:$amt),
+ "allocframe(#$amt)",
+ []>;
+}
+//===----------------------------------------------------------------------===//
+// ST -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/ALU +
+//===----------------------------------------------------------------------===//
+// Logical NOT.
+def NOT_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
+ "$dst = not($src1)",
+ [(set DoubleRegs:$dst, (not DoubleRegs:$src1))]>;
+
+
+// Sign extend word to doubleword.
+def SXTW : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
+ "$dst = sxtw($src1)",
+ [(set DoubleRegs:$dst, (sext IntRegs:$src1))]>;
+//===----------------------------------------------------------------------===//
+// STYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/BIT +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/BIT -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// STYPE/COMPLEX +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/COMPLEX -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/PERM +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/PERM -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/PRED +
+//===----------------------------------------------------------------------===//
+// Predicate transfer.
+let neverHasSideEffects = 1 in
+def TFR_RsPd : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1),
+ "$dst = $src1 // Should almost never emit this",
+ []>;
+
+def TFR_PdRs : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1),
+ "$dst = $src1 // Should almost never emit!",
+ [(set PredRegs:$dst, (trunc IntRegs:$src1))]>;
+//===----------------------------------------------------------------------===//
+// STYPE/PRED -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/SHIFT +
+//===----------------------------------------------------------------------===//
+// Shift by immediate.
+def ASR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = asr($src1, #$src2)",
+ [(set IntRegs:$dst, (sra IntRegs:$src1, u5ImmPred:$src2))]>;
+
+def ASRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+ "$dst = asr($src1, #$src2)",
+ [(set DoubleRegs:$dst, (sra DoubleRegs:$src1, u6ImmPred:$src2))]>;
+
+def ASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = asl($src1, #$src2)",
+ [(set IntRegs:$dst, (shl IntRegs:$src1, u5ImmPred:$src2))]>;
+
+def LSR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ "$dst = lsr($src1, #$src2)",
+ [(set IntRegs:$dst, (srl IntRegs:$src1, u5ImmPred:$src2))]>;
+
+def LSRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+ "$dst = lsr($src1, #$src2)",
+ [(set DoubleRegs:$dst, (srl DoubleRegs:$src1, u6ImmPred:$src2))]>;
+
+def LSRd_ri_acc : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2,
+ u6Imm:$src3),
+ "$dst += lsr($src2, #$src3)",
+ [(set DoubleRegs:$dst, (add DoubleRegs:$src1,
+ (srl DoubleRegs:$src2,
+ u6ImmPred:$src3)))],
+ "$src1 = $dst">;
+
+// Shift by immediate and accumulate.
+def ASR_rr_acc : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1,
+ IntRegs:$src2,
+ IntRegs:$src3),
+ "$dst += asr($src2, $src3)",
+ [], "$src1 = $dst">;
+
+// Shift by immediate and add.
+def ADDASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ u3Imm:$src3),
+ "$dst = addasl($src1, $src2, #$src3)",
+ [(set IntRegs:$dst, (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u3ImmPred:$src3)))]>;
+
+// Shift by register.
+def ASL_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = asl($src1, $src2)",
+ [(set IntRegs:$dst, (shl IntRegs:$src1, IntRegs:$src2))]>;
+
+def ASR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = asr($src1, $src2)",
+ [(set IntRegs:$dst, (sra IntRegs:$src1, IntRegs:$src2))]>;
+
+
+def LSR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = lsr($src1, $src2)",
+ [(set IntRegs:$dst, (srl IntRegs:$src1, IntRegs:$src2))]>;
+
+def LSLd : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ "$dst = lsl($src1, $src2)",
+ [(set DoubleRegs:$dst, (shl DoubleRegs:$src1, IntRegs:$src2))]>;
+
+def ASRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ IntRegs:$src2),
+ "$dst = asr($src1, $src2)",
+ [(set DoubleRegs:$dst, (sra DoubleRegs:$src1, IntRegs:$src2))]>;
+
+def LSRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ IntRegs:$src2),
+ "$dst = lsr($src1, $src2)",
+ [(set DoubleRegs:$dst, (srl DoubleRegs:$src1, IntRegs:$src2))]>;
+
+//===----------------------------------------------------------------------===//
+// STYPE/SHIFT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/VH +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/VH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/VW +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/VW -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/SUPER +
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/USER +
+//===----------------------------------------------------------------------===//
+def SDHexagonBARRIER: SDTypeProfile<0, 0, []>;
+def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDHexagonBARRIER,
+ [SDNPHasChain]>;
+
+let hasSideEffects = 1 in
+def BARRIER : STInst<(outs), (ins),
+ "barrier",
+ [(HexagonBARRIER)]>;
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/SUPER -
+//===----------------------------------------------------------------------===//
+
+// TFRI64 - assembly mapped.
+let isReMaterializable = 1 in
+def TFRI64 : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1),
+ "$dst = #$src1",
+ [(set DoubleRegs:$dst, s8Imm64Pred:$src1)]>;
+
+// Pseudo instruction to encode a set of conditional transfers.
+// This instruction is used instead of a mux and trades-off codesize
+// for performance. We conduct this transformation optimistically in
+// the hope that these instructions get promoted to dot-new transfers.
+let AddedComplexity = 100 in
+def TFR_condset_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+ IntRegs:$src2,
+ IntRegs:$src3),
+ "Error; should not emit",
+ [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+let AddedComplexity = 100 in
+def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, s12Imm:$src2, s12Imm:$src3),
+ "Error; should not emit",
+ [(set IntRegs:$dst, (select PredRegs:$src1,
+ s12ImmPred:$src2,
+ s12ImmPred:$src3))]>;
+
+// Generate frameindex addresses.
+let isReMaterializable = 1 in
+def TFR_FI : ALU32_ri<(outs IntRegs:$dst), (ins FrameIndex:$src1),
+ "$dst = add($src1)",
+ [(set IntRegs:$dst, ADDRri:$src1)]>;
+
+//
+// CR - Type.
+//
+let neverHasSideEffects = 1, Defs = [SA0, LC0] in {
+def LOOP0_i : CRInst<(outs), (ins brtarget:$offset, u10Imm:$src2),
+ "loop0($offset, #$src2)",
+ []>;
+}
+
+let neverHasSideEffects = 1, Defs = [SA0, LC0] in {
+def LOOP0_r : CRInst<(outs), (ins brtarget:$offset, IntRegs:$src2),
+ "loop0($offset, $src2)",
+ []>;
+}
+
+let isBranch = 1, isTerminator = 1, neverHasSideEffects = 1,
+ Defs = [PC, LC0], Uses = [SA0, LC0] in {
+def ENDLOOP0 : CRInst<(outs), (ins brtarget:$offset),
+ ":endloop0",
+ []>;
+}
+
+// Support for generating global address.
+// Taken from X86InstrInfo.td.
+def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<0>]>;
+def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>;
+def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;
+
+// This pattern is incorrect. When we add small data, we should change
+// this pattern to use memw(#foo).
+let isMoveImm = 1 in
+def CONST32 : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst = CONST32(#$global)",
+ [(set IntRegs:$dst,
+ (load (HexagonCONST32 tglobaltlsaddr:$global)))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_set : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst = CONST32(#$global)",
+ [(set IntRegs:$dst,
+ (HexagonCONST32 tglobaladdr:$global))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_set_jt : LDInst<(outs IntRegs:$dst), (ins jumptablebase:$jt),
+ "$dst = CONST32(#$jt)",
+ [(set IntRegs:$dst,
+ (HexagonCONST32 tjumptable:$jt))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32GP_set : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst = CONST32(#$global)",
+ [(set IntRegs:$dst,
+ (HexagonCONST32_GP tglobaladdr:$global))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_Int_Real : LDInst<(outs IntRegs:$dst), (ins i32imm:$global),
+ "$dst = CONST32(#$global)",
+ [(set IntRegs:$dst, imm:$global) ]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_Label : LDInst<(outs IntRegs:$dst), (ins bblabel:$label),
+ "$dst = CONST32($label)",
+ [(set IntRegs:$dst, (HexagonCONST32 bbl:$label))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST64_Int_Real : LDInst<(outs DoubleRegs:$dst), (ins i64imm:$global),
+ "$dst = CONST64(#$global)",
+ [(set DoubleRegs:$dst, imm:$global) ]>;
+
+def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins),
+ "$dst = xor($dst, $dst)",
+ [(set PredRegs:$dst, 0)]>;
+
+def MPY_trsext : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = mpy($src1, $src2)",
+ [(set IntRegs:$dst,
+ (trunc (i64 (srl (i64 (mul (i64 (sext IntRegs:$src1)),
+ (i64 (sext IntRegs:$src2)))),
+ (i32 32)))))]>;
+
+// Pseudo instructions.
+def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+
+def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+
+def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+def call : SDNode<"HexagonISD::CALL", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain,
+// Optional Flag and Variable Arguments.
+// Its 1 Operand has pointer type.
+def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+let Defs = [R29, R30], Uses = [R31, R30, R29] in {
+ def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+ "Should never be emitted",
+ [(callseq_start timm:$amt)]>;
+}
+
+let Defs = [R29, R30, R31], Uses = [R29] in {
+ def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "Should never be emitted",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+// Call subroutine.
+let isCall = 1, neverHasSideEffects = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+ R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def CALL : JInst<(outs), (ins calltarget:$dst, variable_ops),
+ "call $dst", []>;
+}
+
+// Call subroutine from register.
+let isCall = 1, neverHasSideEffects = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+ R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def CALLR : JRInst<(outs), (ins IntRegs:$dst, variable_ops),
+ "callr $dst",
+ []>;
+ }
+
+// Tail Calls.
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+ R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def TCRETURNtg : JInst<(outs), (ins calltarget:$dst, variable_ops),
+ "jump $dst // TAILCALL", []>;
+}
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+ R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def TCRETURNtext : JInst<(outs), (ins calltarget:$dst, variable_ops),
+ "jump $dst // TAILCALL", []>;
+}
+
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+ R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def TCRETURNR : JInst<(outs), (ins IntRegs:$dst, variable_ops),
+ "jumpr $dst // TAILCALL", []>;
+}
+// Map call instruction.
+def : Pat<(call IntRegs:$dst),
+ (CALLR IntRegs:$dst)>, Requires<[HasV2TOnly]>;
+def : Pat<(call tglobaladdr:$dst),
+ (CALL tglobaladdr:$dst)>, Requires<[HasV2TOnly]>;
+def : Pat<(call texternalsym:$dst),
+ (CALL texternalsym:$dst)>, Requires<[HasV2TOnly]>;
+//Tail calls.
+def : Pat<(HexagonTCRet tglobaladdr:$dst),
+ (TCRETURNtg tglobaladdr:$dst)>;
+def : Pat<(HexagonTCRet texternalsym:$dst),
+ (TCRETURNtext texternalsym:$dst)>;
+def : Pat<(HexagonTCRet IntRegs:$dst),
+ (TCRETURNR IntRegs:$dst)>;
+
+// Map from r0 = and(r1, 65535) to r0 = zxth(r1).
+def : Pat <(and IntRegs:$src1, 65535),
+ (ZXTH IntRegs:$src1)>;
+
+// Map from r0 = and(r1, 255) to r0 = zxtb(r1).
+def : Pat <(and IntRegs:$src1, 255),
+ (ZXTB IntRegs:$src1)>;
+
+// Map Add(p1, true) to p1 = not(p1).
+// Add(p1, false) should never be produced,
+// if it does, it got to be mapped to NOOP.
+def : Pat <(add PredRegs:$src1, -1),
+ (NOT_pp PredRegs:$src1)>;
+
+// Map from p0 = setlt(r0, r1) r2 = mux(p0, r3, r4) =>
+// p0 = cmp.lt(r0, r1), r0 = mux(p0, r2, r1).
+def : Pat <(select (i1 (setlt IntRegs:$src1, IntRegs:$src2)), IntRegs:$src3,
+ IntRegs:$src4),
+ (TFR_condset_rr (CMPLTrr IntRegs:$src1, IntRegs:$src2), IntRegs:$src4,
+ IntRegs:$src3)>, Requires<[HasV2TOnly]>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
+def : Pat <(select (not PredRegs:$src1), s8ImmPred:$src2, s8ImmPred:$src3),
+ (TFR_condset_ii PredRegs:$src1, s8ImmPred:$src3, s8ImmPred:$src2)>;
+
+// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
+def : Pat <(brcond (not PredRegs:$src1), bb:$offset),
+ (JMP_PredNot PredRegs:$src1, bb:$offset)>;
+
+// Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2).
+def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)),
+ (AND_pnotp PredRegs:$src1, PredRegs:$src2)>;
+
+// Map from store(globaladdress + x) -> memd(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(store DoubleRegs:$src1,
+ (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (STrid_GP tglobaladdr:$global, u16ImmPred:$offset, DoubleRegs:$src1)>;
+
+// Map from store(globaladdress) -> memd(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(store DoubleRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)),
+ (STrid_GP tglobaladdr:$global, 0, DoubleRegs:$src1)>;
+
+// Map from store(globaladdress + x) -> memw(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(store IntRegs:$src1, (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (STriw_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>;
+
+// Map from store(globaladdress) -> memw(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(store IntRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)),
+ (STriw_GP tglobaladdr:$global, 0, IntRegs:$src1)>;
+
+// Map from store(globaladdress) -> memw(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(store IntRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)),
+ (STriw_GP tglobaladdr:$global, 0, IntRegs:$src1)>;
+
+// Map from store(globaladdress + x) -> memh(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(truncstorei16 IntRegs:$src1,
+ (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (STrih_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>;
+
+// Map from store(globaladdress) -> memh(#foo).
+let AddedComplexity = 100 in
+def : Pat <(truncstorei16 IntRegs:$src1,
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STh_GP tglobaladdr:$global, IntRegs:$src1)>;
+
+// Map from store(globaladdress + x) -> memb(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(truncstorei8 IntRegs:$src1,
+ (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (STrib_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>;
+
+// Map from store(globaladdress) -> memb(#foo).
+let AddedComplexity = 100 in
+def : Pat <(truncstorei8 IntRegs:$src1,
+ (HexagonCONST32_GP tglobaladdr:$global)),
+ (STb_GP tglobaladdr:$global, IntRegs:$src1)>;
+
+// Map from load(globaladdress + x) -> memw(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(load (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memw(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(load (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDw_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress + x) -> memd(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset))),
+ (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memw(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (LDd_GP tglobaladdr:$global)>;
+
+
+// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress + 0), Pd = Rd.
+let AddedComplexity = 100 in
+def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
+ (TFR_PdRs (LDrib_GP tglobaladdr:$global, 0))>;
+
+// Map from load(globaladdress + x) -> memh(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (LDrih_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memh(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDrih_GP tglobaladdr:$global, 0)>;
+
+// Map from load(globaladdress + x) -> memuh(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memuh(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDriuh_GP tglobaladdr:$global, 0)>;
+
+// Map from load(globaladdress + x) -> memuh(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memuh(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(extloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDriuh_GP tglobaladdr:$global, 0)>;
+// Map from load(globaladdress + x) -> memub(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memuh(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDriub_GP tglobaladdr:$global, 0)>;
+
+// Map from load(globaladdress + x) -> memb(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+ u16ImmPred:$offset)),
+ (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memb(#foo).
+let AddedComplexity = 100 in
+def : Pat <(extloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDb_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memb(#foo).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDb_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memub(#foo).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDub_GP tglobaladdr:$global)>;
+
+// When the Interprocedural Global Variable optimizer realizes that a
+// certain global variable takes only two constant values, it shrinks the
+// global to a boolean. Catch those loads here in the following 3 patterns.
+let AddedComplexity = 100 in
+def : Pat <(extloadi1 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDb_GP tglobaladdr:$global)>;
+
+let AddedComplexity = 100 in
+def : Pat <(sextloadi1 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDb_GP tglobaladdr:$global)>;
+
+let AddedComplexity = 100 in
+def : Pat <(zextloadi1 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDub_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memh(#foo).
+let AddedComplexity = 100 in
+def : Pat <(extloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDh_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memh(#foo).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDh_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memuh(#foo).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+ (LDuh_GP tglobaladdr:$global)>;
+
+// Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned.
+def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)),
+ (AND_rr (LDrib ADDRriS11_0:$addr), (TFRI 0x1))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = SXTW(Rss.lo).
+def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i32)),
+ (i64 (SXTW (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg)))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = SXTW(SXTH(Rss.lo)).
+def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i16)),
+ (i64 (SXTW (SXTH (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg))))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = SXTW(SXTB(Rss.lo)).
+def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i8)),
+ (i64 (SXTW (SXTB (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg))))>;
+
+// We want to prevent emiting pnot's as much as possible.
+// Map brcond with an unsupported setcc to a JMP_PredNot.
+def : Pat <(brcond (i1 (setne IntRegs:$src1, IntRegs:$src2)), bb:$offset),
+ (JMP_PredNot (CMPEQrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne IntRegs:$src1, s10ImmPred:$src2)), bb:$offset),
+ (JMP_PredNot (CMPEQri IntRegs:$src1, s10ImmPred:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne PredRegs:$src1, (i1 -1))), bb:$offset),
+ (JMP_PredNot PredRegs:$src1, bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne PredRegs:$src1, (i1 0))), bb:$offset),
+ (JMP_Pred PredRegs:$src1, bb:$offset)>;
+
+def : Pat <(brcond (i1 (setlt IntRegs:$src1, s8ImmPred:$src2)), bb:$offset),
+ (JMP_PredNot (CMPGEri IntRegs:$src1, s8ImmPred:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setlt IntRegs:$src1, IntRegs:$src2)), bb:$offset),
+ (JMP_Pred (CMPLTrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setuge DoubleRegs:$src1, DoubleRegs:$src2)),
+ bb:$offset),
+ (JMP_PredNot (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1),
+ bb:$offset)>;
+
+def : Pat <(brcond (i1 (setule IntRegs:$src1, IntRegs:$src2)), bb:$offset),
+ (JMP_PredNot (CMPGTUrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setule DoubleRegs:$src1, DoubleRegs:$src2)),
+ bb:$offset),
+ (JMP_PredNot (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2),
+ bb:$offset)>;
+
+// Map from a 64-bit select to an emulated 64-bit mux.
+// Hexagon does not support 64-bit MUXes; so emulate with combines.
+def : Pat <(select PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ (COMBINE_rr
+ (MUX_rr PredRegs:$src1,
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$src3, subreg_hireg)),
+ (MUX_rr PredRegs:$src1,
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src3, subreg_loreg)))>;
+
+// Map from a 1-bit select to logical ops.
+// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3).
+def : Pat <(select PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+ (OR_pp (AND_pp PredRegs:$src1, PredRegs:$src2),
+ (AND_pp (NOT_pp PredRegs:$src1), PredRegs:$src3))>;
+
+// Map Pd = load(addr) -> Rs = load(addr); Pd = Rs.
+def : Pat<(i1 (load ADDRriS11_2:$addr)),
+ (i1 (TFR_PdRs (i32 (LDrib ADDRriS11_2:$addr))))>;
+
+// Map for truncating from 64 immediates to 32 bit immediates.
+def : Pat<(i32 (trunc DoubleRegs:$src)),
+ (i32 (EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg))>;
+
+// Map for truncating from i64 immediates to i1 bit immediates.
+def : Pat<(i1 (trunc DoubleRegs:$src)),
+ (i1 (TFR_PdRs (i32(EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg))))>;
+
+// Map memw(Rs) = Rdd -> memw(Rs) = Rt.
+def : Pat<(truncstorei8 DoubleRegs:$src, ADDRriS11_0:$addr),
+ (STrib ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src,
+ subreg_loreg)))>;
+
+// Map memh(Rs) = Rdd -> memh(Rs) = Rt.
+def : Pat<(truncstorei16 DoubleRegs:$src, ADDRriS11_0:$addr),
+ (STrih ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src,
+ subreg_loreg)))>;
+
+// Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0.
+def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
+ (STrib ADDRriS11_2:$addr, (TFRI 1))>;
+
+let AddedComplexity = 100 in
+// Map from i1 = constant<-1>; memw(CONST32(#foo)) = i1 -> r0 = 1;
+// memw(#foo) = r0
+def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
+ (STb_GP tglobaladdr:$global, (TFRI 1))>;
+
+
+// Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0.
+def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
+ (STrib ADDRriS11_2:$addr, (TFRI 1))>;
+
+// Map from memb(Rs) = Pd -> Rt = mux(Pd, #0, #1); store Rt.
+def : Pat<(store PredRegs:$src1, ADDRriS11_2:$addr),
+ (STrib ADDRriS11_2:$addr, (i32 (MUX_ii PredRegs:$src1, 1, 0)) )>;
+
+// Map Rdd = anyext(Rs) -> Rdd = sxtw(Rs).
+// Hexagon_TODO: We can probably use combine but that will cost 2 instructions.
+// Better way to do this?
+def : Pat<(i64 (anyext IntRegs:$src1)),
+ (i64 (SXTW IntRegs:$src1))>;
+
+// Map cmple -> cmpgt.
+// rs <= rt -> !(rs > rt).
+def : Pat<(i1 (setle IntRegs:$src1, s10ImmPred:$src2)),
+ (i1 (NOT_Ps (CMPGTri IntRegs:$src1, s10ImmPred:$src2)))>;
+
+// rs <= rt -> !(rs > rt).
+def : Pat<(i1 (setle IntRegs:$src1, IntRegs:$src2)),
+ (i1 (NOT_Ps (CMPGTrr IntRegs:$src1, IntRegs:$src2)))>;
+
+// Rss <= Rtt -> !(Rss > Rtt).
+def : Pat<(i1 (setle DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (NOT_Ps (CMPGT64rr DoubleRegs:$src1, DoubleRegs:$src2)))>;
+
+// Map cmpne -> cmpeq.
+// Hexagon_TODO: We should improve on this.
+// rs != rt -> !(rs == rt).
+def : Pat <(i1 (setne IntRegs:$src1, s10ImmPred:$src2)),
+ (i1 (NOT_Ps(i1 (CMPEQri IntRegs:$src1, s10ImmPred:$src2))))>;
+
+// Map cmpne(Rs) -> !cmpeqe(Rs).
+// rs != rt -> !(rs == rt).
+def : Pat <(i1 (setne IntRegs:$src1, IntRegs:$src2)),
+ (i1 (NOT_Ps(i1 (CMPEQrr IntRegs:$src1, IntRegs:$src2))))>;
+
+// Convert setne back to xor for hexagon since we compute w/ pred registers.
+def : Pat <(i1 (setne PredRegs:$src1, PredRegs:$src2)),
+ (i1 (XOR_pp PredRegs:$src1, PredRegs:$src2))>;
+
+// Map cmpne(Rss) -> !cmpew(Rss).
+// rs != rt -> !(rs == rt).
+def : Pat <(i1 (setne DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (NOT_Ps(i1 (CMPEHexagon4rr DoubleRegs:$src1, DoubleRegs:$src2))))>;
+
+// Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setge IntRegs:$src1, IntRegs:$src2)),
+ (i1 (NOT_Ps(i1 (CMPGTrr IntRegs:$src2, IntRegs:$src1))))>;
+
+def : Pat <(i1 (setge IntRegs:$src1, s8ImmPred:$src2)),
+ (i1 (CMPGEri IntRegs:$src1, s8ImmPred:$src2))>;
+
+// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
+// rss >= rtt -> !(rtt > rss).
+def : Pat <(i1 (setge DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (NOT_Ps(i1 (CMPGT64rr DoubleRegs:$src2, DoubleRegs:$src1))))>;
+
+// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
+// rs < rt -> !(rs >= rt).
+def : Pat <(i1 (setlt IntRegs:$src1, s8ImmPred:$src2)),
+ (i1 (NOT_Ps (CMPGEri IntRegs:$src1, s8ImmPred:$src2)))>;
+
+// Map cmplt(Rs, Rt) -> cmplt(Rs, Rt).
+// rs < rt -> rs < rt. Let assembler map it.
+def : Pat <(i1 (setlt IntRegs:$src1, IntRegs:$src2)),
+ (i1 (CMPLTrr IntRegs:$src2, IntRegs:$src1))>;
+
+// Map cmplt(Rss, Rtt) -> cmpgt(Rtt, Rss).
+// rss < rtt -> (rtt > rss).
+def : Pat <(i1 (setlt DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (CMPGT64rr DoubleRegs:$src2, DoubleRegs:$src1))>;
+
+// Map from cmpltu(Rs, Rd) -> !cmpgtu(Rs, Rd - 1).
+// rs < rt -> rt > rs.
+def : Pat <(i1 (setult IntRegs:$src1, IntRegs:$src2)),
+ (i1 (CMPGTUrr IntRegs:$src2, IntRegs:$src1))>;
+
+// Map from cmpltu(Rss, Rdd) -> !cmpgtu(Rss, Rdd - 1).
+// rs < rt -> rt > rs.
+def : Pat <(i1 (setult DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1))>;
+
+// Map from Rs >= Rt -> !(Rt > Rs).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setuge IntRegs:$src1, IntRegs:$src2)),
+ (i1 (NOT_Ps (CMPGTUrr IntRegs:$src2, IntRegs:$src1)))>;
+
+// Map from Rs >= Rt -> !(Rt > Rs).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setuge DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (NOT_Ps (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1)))>;
+
+// Map from cmpleu(Rs, Rs) -> !cmpgtu(Rs, Rs).
+// Map from (Rs <= Rt) -> !(Rs > Rt).
+def : Pat <(i1 (setule IntRegs:$src1, IntRegs:$src2)),
+ (i1 (NOT_Ps (CMPGTUrr IntRegs:$src1, IntRegs:$src2)))>;
+
+// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1).
+// Map from (Rs <= Rt) -> !(Rs > Rt).
+def : Pat <(i1 (setule DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i1 (NOT_Ps (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2)))>;
+
+// Sign extends.
+// i1 -> i32
+def : Pat <(i32 (sext PredRegs:$src1)),
+ (i32 (MUX_ii PredRegs:$src1, -1, 0))>;
+
+// Convert sign-extended load back to load and sign extend.
+// i8 -> i64
+def: Pat <(i64 (sextloadi8 ADDRriS11_0:$src1)),
+ (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>;
+
+// Convert any-extended load back to load and sign extend.
+// i8 -> i64
+def: Pat <(i64 (extloadi8 ADDRriS11_0:$src1)),
+ (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>;
+
+// Convert sign-extended load back to load and sign extend.
+// i16 -> i64
+def: Pat <(i64 (sextloadi16 ADDRriS11_1:$src1)),
+ (i64 (SXTW (LDrih ADDRriS11_1:$src1)))>;
+
+// Convert sign-extended load back to load and sign extend.
+// i32 -> i64
+def: Pat <(i64 (sextloadi32 ADDRriS11_2:$src1)),
+ (i64 (SXTW (LDriw ADDRriS11_2:$src1)))>;
+
+
+// Zero extends.
+// i1 -> i32
+def : Pat <(i32 (zext PredRegs:$src1)),
+ (i32 (MUX_ii PredRegs:$src1, 1, 0))>;
+
+// i1 -> i64
+def : Pat <(i64 (zext PredRegs:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (MUX_ii PredRegs:$src1, 1, 0)))>;
+
+// i32 -> i64
+def : Pat <(i64 (zext IntRegs:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), IntRegs:$src1))>;
+
+// i8 -> i64
+def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>;
+
+// i16 -> i64
+def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>;
+
+// i32 -> i64
+def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>;
+
+def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
+ (i32 (LDriw ADDRriS11_0:$src1))>;
+
+// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
+def : Pat <(i32 (zext PredRegs:$src1)),
+ (i32 (MUX_ii PredRegs:$src1, 1, 0))>;
+
+// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
+def : Pat <(i32 (anyext PredRegs:$src1)),
+ (i32 (MUX_ii PredRegs:$src1, 1, 0))>;
+
+// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
+def : Pat <(i64 (anyext PredRegs:$src1)),
+ (i64 (SXTW (i32 (MUX_ii PredRegs:$src1, 1, 0))))>;
+
+
+// Any extended 64-bit load.
+// anyext i32 -> i64
+def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>;
+
+// anyext i16 -> i64.
+def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>;
+
+// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs).
+def : Pat<(i64 (zext IntRegs:$src1)),
+ (i64 (COMBINE_rr (TFRI 0), IntRegs:$src1))>;
+
+// Multiply 64-bit unsigned and use upper result.
+def : Pat <(mulhu DoubleRegs:$src1, DoubleRegs:$src2),
+ (MPYU64_acc(COMBINE_rr (TFRI 0),
+ (EXTRACT_SUBREG
+ (LSRd_ri(MPYU64_acc(MPYU64_acc(COMBINE_rr (TFRI 0),
+ (EXTRACT_SUBREG (LSRd_ri(MPYU64
+ (EXTRACT_SUBREG DoubleRegs:$src1,
+ subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2,
+ subreg_loreg)),
+ 32) ,subreg_loreg)),
+ (EXTRACT_SUBREG DoubleRegs:$src1,
+ subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$src2,
+ subreg_loreg)),
+ (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)),
+ 32),subreg_loreg)),
+ (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)
+ )>;
+
+// Multiply 64-bit signed and use upper result.
+def : Pat <(mulhs DoubleRegs:$src1, DoubleRegs:$src2),
+ (MPY64_acc(COMBINE_rr (TFRI 0),
+ (EXTRACT_SUBREG
+ (LSRd_ri(MPY64_acc(MPY64_acc(COMBINE_rr (TFRI 0),
+ (EXTRACT_SUBREG (LSRd_ri(MPYU64
+ (EXTRACT_SUBREG DoubleRegs:$src1,
+ subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2,
+ subreg_loreg)),
+ 32) ,subreg_loreg)),
+ (EXTRACT_SUBREG DoubleRegs:$src1,
+ subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$src2,
+ subreg_loreg)),
+ (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)),
+ 32),subreg_loreg)),
+ (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)
+ )>;
+
+// Hexagon specific ISD nodes.
+def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>]>;
+def Hexagon_ADJDYNALLOC : SDNode<"HexagonISD::ADJDYNALLOC",
+ SDTHexagonADJDYNALLOC>;
+// Needed to tag these instructions for stack layout.
+let usesCustomInserter = 1 in
+def ADJDYNALLOC : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1,
+ s16Imm:$src2),
+ "$dst = add($src1, #$src2)",
+ [(set IntRegs:$dst, (Hexagon_ADJDYNALLOC IntRegs:$src1,
+ s16ImmPred:$src2))]>;
+
+def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, []>;
+def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>;
+def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1),
+ "$dst = $src1",
+ [(set IntRegs:$dst, (Hexagon_ARGEXTEND IntRegs:$src1))]>;
+
+let AddedComplexity = 100 in
+def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND IntRegs:$src1), i16)),
+ (TFR IntRegs:$src1)>;
+
+
+def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT : JRInst<(outs), (ins IntRegs:$src),
+ "jumpr $src",
+ [(HexagonBR_JT IntRegs:$src)]>;
+def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>;
+
+def : Pat<(HexagonWrapperJT tjumptable:$dst),
+ (CONST32_set_jt tjumptable:$dst)>;
+
+
+//===----------------------------------------------------------------------===//
+// V3 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV3.td"
+
+//===----------------------------------------------------------------------===//
+// V3 Instructions -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V4 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV4.td"
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV3.td b/lib/Target/Hexagon/HexagonInstrInfoV3.td
new file mode 100644
index 0000000..a73897e
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfoV3.td
@@ -0,0 +1,134 @@
+//=- HexagonInstrInfoV3.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V3 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// J +
+//===----------------------------------------------------------------------===//
+// Call subroutine.
+let isCall = 1, neverHasSideEffects = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31,
+ P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def CALLv3 : JInst<(outs), (ins calltarget:$dst, variable_ops),
+ "call $dst", []>, Requires<[HasV3T]>;
+}
+
+//===----------------------------------------------------------------------===//
+// J -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// JR +
+//===----------------------------------------------------------------------===//
+// Call subroutine from register.
+let isCall = 1, neverHasSideEffects = 1,
+ Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31,
+ P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+ def CALLRv3 : JRInst<(outs), (ins IntRegs:$dst, variable_ops),
+ "callr $dst",
+ []>, Requires<[HasV3TOnly]>;
+ }
+
+
+// if(p?.new) jumpr:t r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cPnewt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) jumpr:t $src2",
+ []>, Requires<[HasV3T]>;
+}
+
+// if (!p?.new) jumpr:t r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cNotPnewt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) jumpr:t $src2",
+ []>, Requires<[HasV3T]>;
+}
+
+// Not taken.
+// if(p?.new) jumpr:nt r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cPnewNt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) jumpr:nt $src2",
+ []>, Requires<[HasV3T]>;
+}
+
+// if (!p?.new) jumpr:nt r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+ Defs = [PC], Uses = [R31] in {
+ def JMPR_cNotPnewNt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) jumpr:nt $src2",
+ []>, Requires<[HasV3T]>;
+}
+
+//===----------------------------------------------------------------------===//
+// JR -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU +
+//===----------------------------------------------------------------------===//
+
+let AddedComplexity = 200 in
+def MAXw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = max($src2, $src1)",
+ [(set DoubleRegs:$dst, (select (i1 (setlt DoubleRegs:$src2,
+ DoubleRegs:$src1)),
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))]>,
+Requires<[HasV3T]>;
+
+let AddedComplexity = 200 in
+def MINw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ "$dst = min($src2, $src1)",
+ [(set DoubleRegs:$dst, (select (i1 (setgt DoubleRegs:$src2,
+ DoubleRegs:$src1)),
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))]>,
+Requires<[HasV3T]>;
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU -
+//===----------------------------------------------------------------------===//
+
+
+
+
+//def : Pat <(brcond (i1 (seteq IntRegs:$src1, 0)), bb:$offset),
+// (JMP_RegEzt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setne IntRegs:$src1, 0)), bb:$offset),
+// (JMP_RegNzt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setle IntRegs:$src1, 0)), bb:$offset),
+// (JMP_RegLezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setge IntRegs:$src1, 0)), bb:$offset),
+// (JMP_RegGezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setgt IntRegs:$src1, -1)), bb:$offset),
+// (JMP_RegGezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>;
+
+
+// Map call instruction
+def : Pat<(call IntRegs:$dst),
+ (CALLRv3 IntRegs:$dst)>, Requires<[HasV3T]>;
+def : Pat<(call tglobaladdr:$dst),
+ (CALLv3 tglobaladdr:$dst)>, Requires<[HasV3T]>;
+def : Pat<(call texternalsym:$dst),
+ (CALLv3 texternalsym:$dst)>, Requires<[HasV3T]>;
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
new file mode 100644
index 0000000..24218d0
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -0,0 +1,3392 @@
+//=- HexagonInstrInfoV4.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V4 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+// Hexagon V4 Architecture spec defines 8 instruction classes:
+// LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the
+// compiler)
+
+// LD Instructions:
+// ========================================
+// Loads (8/16/32/64 bit)
+// Deallocframe
+
+// ST Instructions:
+// ========================================
+// Stores (8/16/32/64 bit)
+// Allocframe
+
+// ALU32 Instructions:
+// ========================================
+// Arithmetic / Logical (32 bit)
+// Vector Halfword
+
+// XTYPE Instructions (32/64 bit):
+// ========================================
+// Arithmetic, Logical, Bit Manipulation
+// Multiply (Integer, Fractional, Complex)
+// Permute / Vector Permute Operations
+// Predicate Operations
+// Shift / Shift with Add/Sub/Logical
+// Vector Byte ALU
+// Vector Halfword (ALU, Shift, Multiply)
+// Vector Word (ALU, Shift)
+
+// J Instructions:
+// ========================================
+// Jump/Call PC-relative
+
+// JR Instructions:
+// ========================================
+// Jump/Call Register
+
+// MEMOP Instructions:
+// ========================================
+// Operation on memory (8/16/32 bit)
+
+// NV Instructions:
+// ========================================
+// New-value Jumps
+// New-value Stores
+
+// CR Instructions:
+// ========================================
+// Control-Register Transfers
+// Hardware Loop Setup
+// Predicate Logicals & Reductions
+
+// SYSTEM Instructions (not implemented in the compiler):
+// ========================================
+// Prefetch
+// Cache Maintenance
+// Bus Operations
+
+
+//===----------------------------------------------------------------------===//
+// ALU32 +
+//===----------------------------------------------------------------------===//
+
+// Shift halfword.
+
+def ASLH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = aslh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def ASLH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1) $dst = aslh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def ASLH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) $dst = aslh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def ASLH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) $dst = aslh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def ASRH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = asrh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def ASRH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1) $dst = asrh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def ASRH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) $dst = asrh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def ASRH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) $dst = asrh($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Sign extend.
+
+def SXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = sxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def SXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1) $dst = sxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def SXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) $dst = sxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def SXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) $dst = sxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+def SXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = sxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def SXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1) $dst = sxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def SXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) $dst = sxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+def SXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) $dst = sxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Zero exten.
+
+let neverHasSideEffects = 1 in
+def ZXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = zxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1) $dst = zxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) $dst = zxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) $dst = zxtb($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1) $dst = zxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1) $dst = zxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if ($src1.new) $dst = zxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2),
+ "if (!$src1.new) $dst = zxth($src2)",
+ []>,
+ Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===//
+// ALU32 -
+//===----------------------------------------------------------------------===//
+
+
+
+//===----------------------------------------------------------------------===//
+// LD +
+//===----------------------------------------------------------------------===//
+///
+/// Make sure that in post increment load, the first operand is always the post
+/// increment operand.
+///
+//// Load doubleword.
+// Rdd=memd(Re=#U6)
+
+// Rdd=memd(Rs+Rt<<#u2)
+// Special case pattern for indexed load without offset which is easier to
+// match. AddedComplexity of this pattern should be lower than base+offset load
+// and lower yet than the more generic version with offset/shift below
+// Similar approach is taken for all other base+index loads.
+let AddedComplexity = 10, isPredicable = 1 in
+def LDrid_indexed_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memd($src1+$src2<<#0)",
+ [(set DoubleRegs:$dst, (load (add IntRegs:$src1,
+ IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDrid_indexed_shl_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memd($src1+$src2<<#$offset)",
+ [(set DoubleRegs:$dst, (load (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u2ImmPred:$offset))))]>,
+ Requires<[HasV4T]>;
+
+//// Load doubleword conditionally.
+// if ([!]Pv[.new]) Rd=memd(Rs+Rt<<#u2)
+// if (Pv) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrid_indexed_cPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst=memd($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrid_indexed_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst=memd($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrid_indexed_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst=memd($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrid_indexed_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst=memd($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrid_indexed_shl_cPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1) $dst=memd($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrid_indexed_shl_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1.new) $dst=memd($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrid_indexed_shl_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1) $dst=memd($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrid_indexed_shl_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1.new) $dst=memd($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rdd=memd(Rt<<#u2+#U6)
+
+//// Load byte.
+// Rd=memb(Re=#U6)
+
+// Rd=memb(Rs+Rt<<#u2)
+let AddedComplexity = 10, isPredicable = 1 in
+def LDrib_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memb($src1+$src2<<#0)",
+ [(set IntRegs:$dst, (sextloadi8 (add IntRegs:$src1,
+ IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriub_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memub($src1+$src2<<#0)",
+ [(set IntRegs:$dst, (zextloadi8 (add IntRegs:$src1,
+ IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriub_ae_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memub($src1+$src2<<#0)",
+ [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1,
+ IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDrib_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memb($src1+$src2<<#$offset)",
+ [(set IntRegs:$dst,
+ (sextloadi8 (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u2ImmPred:$offset))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriub_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memub($src1+$src2<<#$offset)",
+ [(set IntRegs:$dst,
+ (zextloadi8 (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u2ImmPred:$offset))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriub_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memub($src1+$src2<<#$offset)",
+ [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u2ImmPred:$offset))))]>,
+ Requires<[HasV4T]>;
+
+//// Load byte conditionally.
+// if ([!]Pv[.new]) Rd=memb(Rs+Rt<<#u2)
+// if (Pv) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrib_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst=memb($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrib_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst=memb($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrib_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst=memb($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrib_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst=memb($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrib_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1) $dst=memb($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrib_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1.new) $dst=memb($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrib_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1) $dst=memb($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrib_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1.new) $dst=memb($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+//// Load unsigned byte conditionally.
+// if ([!]Pv[.new]) Rd=memub(Rs+Rt<<#u2)
+// if (Pv) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriub_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst=memub($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriub_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst=memub($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriub_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst=memub($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriub_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst=memub($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriub_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1) $dst=memub($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriub_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1.new) $dst=memub($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriub_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1) $dst=memub($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriub_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1.new) $dst=memub($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memb(Rt<<#u2+#U6)
+
+//// Load halfword
+// Rd=memh(Re=#U6)
+
+// Rd=memh(Rs+Rt<<#u2)
+let AddedComplexity = 10, isPredicable = 1 in
+def LDrih_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memh($src1+$src2<<#0)",
+ [(set IntRegs:$dst, (sextloadi16 (add IntRegs:$src1,
+ IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriuh_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memuh($src1+$src2<<#0)",
+ [(set IntRegs:$dst, (zextloadi16 (add IntRegs:$src1,
+ IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriuh_ae_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memuh($src1+$src2<<#0)",
+ [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1,
+ IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+// Rd=memh(Rs+Rt<<#u2)
+let AddedComplexity = 40, isPredicable = 1 in
+def LDrih_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memh($src1+$src2<<#$offset)",
+ [(set IntRegs:$dst,
+ (sextloadi16 (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u2ImmPred:$offset))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriuh_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memuh($src1+$src2<<#$offset)",
+ [(set IntRegs:$dst,
+ (zextloadi16 (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u2ImmPred:$offset))))]>,
+ Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriuh_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memuh($src1+$src2<<#$offset)",
+ [(set IntRegs:$dst,
+ (extloadi16 (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u2ImmPred:$offset))))]>,
+ Requires<[HasV4T]>;
+
+//// Load halfword conditionally.
+// if ([!]Pv[.new]) Rd=memh(Rs+Rt<<#u2)
+// if (Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrih_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst=memh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrih_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst=memh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrih_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst=memh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrih_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst=memh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrih_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1) $dst=memh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrih_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1.new) $dst=memh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrih_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1) $dst=memh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrih_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1.new) $dst=memh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+//// Load unsigned halfword conditionally.
+// if ([!]Pv[.new]) Rd=memuh(Rs+Rt<<#u2)
+// if (Pv) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriuh_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst=memuh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriuh_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst=memuh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriuh_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst=memuh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriuh_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst=memuh($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriuh_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1) $dst=memuh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriuh_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1.new) $dst=memuh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriuh_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1) $dst=memuh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriuh_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1.new) $dst=memuh($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memh(Rt<<#u2+#U6)
+
+//// Load word.
+// Rd=memw(Re=#U6)
+
+// Rd=memw(Rs+Rt<<#u2)
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriw_indexed_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst=memw($src1+$src2<<#0)",
+ [(set IntRegs:$dst, (load (add IntRegs:$src1,
+ IntRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+// Rd=memw(Rs+Rt<<#u2)
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriw_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+ "$dst=memw($src1+$src2<<#$offset)",
+ [(set IntRegs:$dst, (load (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u2ImmPred:$offset))))]>,
+ Requires<[HasV4T]>;
+
+//// Load word conditionally.
+// if ([!]Pv[.new]) Rd=memw(Rs+Rt<<#u2)
+// if (Pv) Rd=memw(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriw_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1) $dst=memw($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriw_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if ($src1.new) $dst=memw($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriw_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1) $dst=memw($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriw_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "if (!$src1.new) $dst=memw($src2+$src3<<#0)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriw_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1) $dst=memw($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriw_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if ($src1.new) $dst=memw($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriw_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1) $dst=memw($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriw_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+ u2Imm:$offset),
+ "if (!$src1.new) $dst=memw($src2+$src3<<#$offset)",
+ []>,
+ Requires<[HasV4T]>;
+
+// Rd=memw(Rt<<#u2+#U6)
+
+
+// Post-inc Load, Predicated, Dot new
+
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid_cdnPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
+ "if ($src1.new) $dst1 = memd($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid_cdnNotPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
+ "if (!$src1.new) $dst1 = memd($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if ($src1.new) $dst1 = memb($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if (!$src1.new) $dst1 = memb($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if ($src1.new) $dst1 = memh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if (!$src1.new) $dst1 = memh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if ($src1.new) $dst1 = memub($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+ "if (!$src1.new) $dst1 = memub($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if ($src1.new) $dst1 = memuh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+ "if (!$src1.new) $dst1 = memuh($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
+ "if ($src1.new) $dst1 = memw($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+ (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
+ "if (!$src1.new) $dst1 = memw($src2++#$src3)",
+ [],
+ "$src2 = $dst2">,
+ Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===//
+// LD -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ST +
+//===----------------------------------------------------------------------===//
+///
+/// Assumptions::: ****** DO NOT IGNORE ********
+/// 1. Make sure that in post increment store, the zero'th operand is always the
+/// post increment operand.
+/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the
+/// last operand.
+///
+
+// Store doubleword.
+// memd(Re=#U6)=Rtt
+// TODO: needs to be implemented
+
+// memd(Rs+#s11:3)=Rtt
+// memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, isPredicable = 1 in
+def STrid_indexed_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, DoubleRegs:$src4),
+ "memd($src1+$src2<<#$src3) = $src4",
+ [(store DoubleRegs:$src4, (add IntRegs:$src1,
+ (shl IntRegs:$src2, u2ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// memd(Ru<<#u2+#U6)=Rtt
+let AddedComplexity = 10 in
+def STrid_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, DoubleRegs:$src4),
+ "memd($src1<<#$src2+#$src3) = $src4",
+ [(store DoubleRegs:$src4, (shl IntRegs:$src1,
+ (add u2ImmPred:$src2,
+ u6ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// memd(Rx++#s4:3)=Rtt
+// memd(Rx++#s4:3:circ(Mu))=Rtt
+// memd(Rx++I:circ(Mu))=Rtt
+// memd(Rx++Mu)=Rtt
+// memd(Rx++Mu:brev)=Rtt
+// memd(gp+#u16:3)=Rtt
+
+// Store doubleword conditionally.
+// if ([!]Pv[.new]) memd(#u6)=Rtt
+// TODO: needs to be implemented.
+
+// if ([!]Pv[.new]) memd(Rs+#u6:3)=Rtt
+// if (Pv) memd(Rs+#u6:3)=Rtt
+// if (Pv.new) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
+ "if ($src1.new) memd($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memd(Rs+#u6:3)=Rtt
+// if (!Pv.new) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
+ "if (!$src1.new) memd($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memd(Rs+#u6:3)=Rtt
+// if (Pv.new) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
+ DoubleRegs:$src4),
+ "if ($src1.new) memd($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memd(Rs+#u6:3)=Rtt
+// if (!Pv.new) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
+ DoubleRegs:$src4),
+ "if (!$src1.new) memd($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memd(Rs+Ru<<#u2)=Rtt
+// if (Pv) memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_shl_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ DoubleRegs:$src5),
+ "if ($src1) memd($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_shl_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ DoubleRegs:$src5),
+ "if ($src1) memd($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+// if (!Pv) memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_shl_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ DoubleRegs:$src5),
+ "if (!$src1) memd($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+// if (!Pv.new) memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ DoubleRegs:$src5),
+ "if (!$src1.new) memd($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memd(Rx++#s4:3)=Rtt
+// if (Pv) memd(Rx++#s4:3)=Rtt
+// if (Pv.new) memd(Rx++#s4:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def POST_STdri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
+ s4_3Imm:$offset),
+ "if ($src1.new) memd($src3++#$offset) = $src2",
+ [],
+ "$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memd(Rx++#s4:3)=Rtt
+// if (!Pv.new) memd(Rx++#s4:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def POST_STdri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
+ s4_3Imm:$offset),
+ "if (!$src1.new) memd($src3++#$offset) = $src2",
+ [],
+ "$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Store byte.
+// memb(Re=#U6)=Rt
+// TODO: needs to be implemented.
+// memb(Rs+#s11:0)=Rt
+// memb(Rs+#u6:0)=#S8
+let AddedComplexity = 10, isPredicable = 1 in
+def STrib_imm_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u6_0Imm:$src2, s8Imm:$src3),
+ "memb($src1+#$src2) = #$src3",
+ [(truncstorei8 s8ImmPred:$src3, (add IntRegs:$src1,
+ u6_0ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// memb(Rs+Ru<<#u2)=Rt
+let AddedComplexity = 10, isPredicable = 1 in
+def STrib_indexed_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+ "memb($src1+$src2<<#$src3) = $src4",
+ [(truncstorei8 IntRegs:$src4, (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u2ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// memb(Ru<<#u2+#U6)=Rt
+let AddedComplexity = 10 in
+def STrib_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ "memb($src1<<#$src2+#$src3) = $src4",
+ [(truncstorei8 IntRegs:$src4, (shl IntRegs:$src1,
+ (add u2ImmPred:$src2,
+ u6ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// memb(Rx++#s4:0:circ(Mu))=Rt
+// memb(Rx++I:circ(Mu))=Rt
+// memb(Rx++Mu)=Rt
+// memb(Rx++Mu:brev)=Rt
+// memb(gp+#u16:0)=Rt
+
+
+// Store byte conditionally.
+// if ([!]Pv[.new]) memb(#u6)=Rt
+// if ([!]Pv[.new]) memb(Rs+#u6:0)=#S6
+// if (Pv) memb(Rs+#u6:0)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_imm_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
+ "if ($src1) memb($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+#u6:0)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_imm_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
+ "if ($src1.new) memb($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+#u6:0)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_imm_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
+ "if (!$src1) memb($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+#u6:0)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_imm_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
+ "if (!$src1.new) memb($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memb(Rs+#u6:0)=Rt
+// if (Pv) memb(Rs+#u6:0)=Rt
+// if (Pv.new) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1.new) memb($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+#u6:0)=Rt
+// if (!Pv.new) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1.new) memb($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memb(Rs+#u6:0)=Rt
+// if (!Pv) memb(Rs+#u6:0)=Rt
+// if (Pv.new) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if ($src1.new) memb($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if (!$src1.new) memb($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Rt
+// if (Pv) memb(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1) memb($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1.new) memb($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1) memb($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memb(Rx++#s4:0)=Rt
+// if (Pv) memb(Rx++#s4:0)=Rt
+// if (Pv.new) memb(Rx++#s4:0)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if ($src1.new) memb($src3++#$offset) = $src2",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rx++#s4:0)=Rt
+// if (!Pv.new) memb(Rx++#s4:0)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if (!$src1.new) memb($src3++#$offset) = $src2",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Store halfword.
+// memh(Re=#U6)=Rt.H
+// TODO: needs to be implemented
+
+// memh(Re=#U6)=Rt
+// TODO: needs to be implemented
+
+// memh(Rs+#s11:1)=Rt.H
+// memh(Rs+#s11:1)=Rt
+// memh(Rs+#u6:1)=#S8
+let AddedComplexity = 10, isPredicable = 1 in
+def STrih_imm_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u6_1Imm:$src2, s8Imm:$src3),
+ "memh($src1+#$src2) = #$src3",
+ [(truncstorei16 s8ImmPred:$src3, (add IntRegs:$src1,
+ u6_1ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// memh(Rs+Ru<<#u2)=Rt.H
+// TODO: needs to be implemented.
+
+// memh(Rs+Ru<<#u2)=Rt
+let AddedComplexity = 10, isPredicable = 1 in
+def STrih_indexed_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+ "memh($src1+$src2<<#$src3) = $src4",
+ [(truncstorei16 IntRegs:$src4, (add IntRegs:$src1,
+ (shl IntRegs:$src2,
+ u2ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// memh(Ru<<#u2+#U6)=Rt.H
+// memh(Ru<<#u2+#U6)=Rt
+let AddedComplexity = 10 in
+def STrih_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ "memh($src1<<#$src2+#$src3) = $src4",
+ [(truncstorei16 IntRegs:$src4, (shl IntRegs:$src1,
+ (add u2ImmPred:$src2,
+ u6ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// memh(Rx++#s4:1:circ(Mu))=Rt.H
+// memh(Rx++#s4:1:circ(Mu))=Rt
+// memh(Rx++I:circ(Mu))=Rt.H
+// memh(Rx++I:circ(Mu))=Rt
+// memh(Rx++Mu)=Rt.H
+// memh(Rx++Mu)=Rt
+// memh(Rx++Mu:brev)=Rt.H
+// memh(Rx++Mu:brev)=Rt
+// memh(gp+#u16:1)=Rt.H
+// memh(gp+#u16:1)=Rt
+
+
+// Store halfword conditionally.
+// if ([!]Pv[.new]) memh(#u6)=Rt.H
+// if ([!]Pv[.new]) memh(#u6)=Rt
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=#S6
+// if (Pv) memh(Rs+#u6:1)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_imm_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
+ "if ($src1) memh($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+#u6:1)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_imm_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
+ "if ($src1.new) memh($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+#u6:1)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_imm_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
+ "if (!$src1) memh($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+#u6:1)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_imm_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
+ "if (!$src1.new) memh($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H
+// TODO: needs to be implemented.
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt
+// if (Pv) memh(Rs+#u6:1)=Rt
+// if (Pv.new) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1.new) memh($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+#u6:1)=Rt
+// if (!Pv.new) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1.new) memh($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if ($src1.new) memh($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if (!$src1.new) memh($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt.H
+// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt
+// if (Pv) memh(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1) memh($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+Ru<<#u2)=Rt
+def STrih_indexed_shl_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1.new) memh($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1) memh($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H
+// TODO: Needs to be implemented.
+
+// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt
+// if (Pv) memh(Rx++#s4:1)=Rt
+// if (Pv.new) memh(Rx++#s4:1)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if ($src1.new) memh($src3++#$offset) = $src2",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rx++#s4:1)=Rt
+// if (!Pv.new) memh(Rx++#s4:1)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if (!$src1.new) memh($src3++#$offset) = $src2",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Store word.
+// memw(Re=#U6)=Rt
+// TODO: Needs to be implemented.
+
+// memw(Rs+#s11:2)=Rt
+// memw(Rs+#u6:2)=#S8
+let AddedComplexity = 10, isPredicable = 1 in
+def STriw_imm_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u6_2Imm:$src2, s8Imm:$src3),
+ "memw($src1+#$src2) = #$src3",
+ [(store s8ImmPred:$src3, (add IntRegs:$src1, u6_2ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// memw(Rs+Ru<<#u2)=Rt
+let AddedComplexity = 10, isPredicable = 1 in
+def STriw_indexed_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+ "memw($src1+$src2<<#$src3) = $src4",
+ [(store IntRegs:$src4, (add IntRegs:$src1,
+ (shl IntRegs:$src2, u2ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// memw(Ru<<#u2+#U6)=Rt
+let AddedComplexity = 10 in
+def STriw_shl_V4 : STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ "memw($src1<<#$src2+#$src3) = $src4",
+ [(store IntRegs:$src4, (shl IntRegs:$src1,
+ (add u2ImmPred:$src2, u6ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// memw(Rx++#s4:2)=Rt
+// memw(Rx++#s4:2:circ(Mu))=Rt
+// memw(Rx++I:circ(Mu))=Rt
+// memw(Rx++Mu)=Rt
+// memw(Rx++Mu:brev)=Rt
+// memw(gp+#u16:2)=Rt
+
+
+// Store word conditionally.
+// if ([!]Pv[.new]) memw(#u6)=Rt
+// TODO: Needs to be implemented.
+
+// if ([!]Pv[.new]) memw(Rs+#u6:2)=#S6
+// if (Pv) memw(Rs+#u6:2)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_imm_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
+ "if ($src1) memw($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+#u6:2)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_imm_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
+ "if ($src1.new) memw($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+#u6:2)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_imm_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
+ "if (!$src1) memw($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+#u6:2)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_imm_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
+ "if (!$src1.new) memw($src2+#$src3) = #$src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memw(Rs+#u6:2)=Rt
+// if (Pv) memw(Rs+#u6:2)=Rt
+// if (Pv.new) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1.new) memw($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+#u6:2)=Rt
+// if (!Pv.new) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1.new) memw($addr) = $src2",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memw(Rs+#u6:2)=Rt
+// if (!Pv) memw(Rs+#u6:2)=Rt
+// if (Pv.new) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if ($src1.new) memw($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if (!$src1.new) memw($src2+#$src3) = $src4",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Rt
+// if (Pv) memw(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1) memw($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cdnPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1.new) memw($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1) memw($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memw(Rx++#s4:2)=Rt
+// if (Pv) memw(Rx++#s4:2)=Rt
+// if (Pv.new) memw(Rx++#s4:2)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if ($src1.new) memw($src3++#$offset) = $src2",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rx++#s4:2)=Rt
+// if (!Pv.new) memw(Rx++#s4:2)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if (!$src1.new) memw($src3++#$offset) = $src2",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===
+// ST -
+//===----------------------------------------------------------------------===
+
+
+//===----------------------------------------------------------------------===//
+// NV/ST +
+//===----------------------------------------------------------------------===//
+
+// Store new-value byte.
+
+// memb(Re=#U6)=Nt.new
+// memb(Rs+#s11:0)=Nt.new
+let mayStore = 1, isPredicable = 1 in
+def STrib_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1),
+ "memb($addr) = $src1.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, isPredicable = 1 in
+def STrib_indexed_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3),
+ "memb($src1+#$src2) = $src3.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
+def STrib_indexed_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+ "memb($src1+$src2<<#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memb(Ru<<#u2+#U6)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ "memb($src1<<#$src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in
+def POST_STbri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, s4_0Imm:$offset),
+ "memb($src2++#$offset) = $src1.new",
+ [],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// memb(Rx++#s4:0:circ(Mu))=Nt.new
+// memb(Rx++I:circ(Mu))=Nt.new
+// memb(Rx++Mu)=Nt.new
+// memb(Rx++Mu:brev)=Nt.new
+
+// memb(gp+#u16:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP_nv_V4 : NVInst_V4<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memb(#$global+$offset) = $src.new",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// Store new-value byte conditionally.
+// if ([!]Pv[.new]) memb(#u6)=Nt.new
+// if (Pv) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1) memb($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1.new) memb($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1) memb($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1.new) memb($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if ($src1) memb($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if ($src1.new) memb($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if (!$src1) memb($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+ "if (!$src1.new) memb($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Nt.new
+// if (Pv) memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1) memb($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1.new) memb($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1) memb($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memb(Rx++#s4:0)=Nt.new
+// if (Pv) memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if ($src1) memb($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if ($src1.new) memb($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if (!$src1) memb($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+ "if (!$src1.new) memb($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Store new-value halfword.
+// memh(Re=#U6)=Nt.new
+// memh(Rs+#s11:1)=Nt.new
+let mayStore = 1, isPredicable = 1 in
+def STrih_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1),
+ "memh($addr) = $src1.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, isPredicable = 1 in
+def STrih_indexed_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, s11_1Imm:$src2, IntRegs:$src3),
+ "memh($src1+#$src2) = $src3.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
+def STrih_indexed_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+ "memh($src1+$src2<<#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memh(Ru<<#u2+#U6)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ "memh($src1<<#$src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in
+def POST_SThri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, s4_1Imm:$offset),
+ "memh($src2++#$offset) = $src1.new",
+ [],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// memh(Rx++#s4:1:circ(Mu))=Nt.new
+// memh(Rx++I:circ(Mu))=Nt.new
+// memh(Rx++Mu)=Nt.new
+// memh(Rx++Mu:brev)=Nt.new
+
+// memh(gp+#u16:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP_nv_V4 : NVInst_V4<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memh(#$global+$offset) = $src.new",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// Store new-value halfword conditionally.
+
+// if ([!]Pv[.new]) memh(#u6)=Nt.new
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=Nt.new
+// if (Pv) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1) memh($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1.new) memh($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1) memh($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1.new) memh($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if ($src1) memh($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if ($src1.new) memh($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if (!$src1) memh($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+ "if (!$src1.new) memh($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Nt.new
+// if (Pv) memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1) memh($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1.new) memh($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1) memh($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[]) memh(Rx++#s4:1)=Nt.new
+// if (Pv) memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if ($src1) memh($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if ($src1.new) memh($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if (!$src1) memh($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+ "if (!$src1.new) memh($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Store new-value word.
+
+// memw(Re=#U6)=Nt.new
+// memw(Rs+#s11:2)=Nt.new
+let mayStore = 1, isPredicable = 1 in
+def STriw_nv_V4 : NVInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$src1),
+ "memw($addr) = $src1.new",
+ []>,
+ Requires<[HasV4T]>;
+
+let mayStore = 1, isPredicable = 1 in
+def STriw_indexed_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
+ "memw($src1+#$src2) = $src3.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
+def STriw_indexed_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+ "memw($src1+$src2<<#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memw(Ru<<#u2+#U6)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_shl_nv_V4 : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+ "memw($src1<<#$src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in
+def POST_STwri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, s4_2Imm:$offset),
+ "memw($src2++#$offset) = $src1.new",
+ [],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// memw(Rx++#s4:2:circ(Mu))=Nt.new
+// memw(Rx++I:circ(Mu))=Nt.new
+// memw(Rx++Mu)=Nt.new
+// memw(Rx++Mu:brev)=Nt.new
+// memw(gp+#u16:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP_nv_V4 : NVInst_V4<(outs),
+ (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+ "memw(#$global+$offset) = $src.new",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// Store new-value word conditionally.
+
+// if ([!]Pv[.new]) memw(#u6)=Nt.new
+
+// if ([!]Pv[.new]) memw(Rs+#u6:2)=Nt.new
+// if (Pv) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1) memw($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if ($src1.new) memw($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1) memw($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+ "if (!$src1.new) memw($addr) = $src2.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if ($src1) memw($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if ($src1.new) memw($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if (!$src1) memw($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+ "if (!$src1.new) memw($src2+#$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+
+// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Nt.new
+// if (Pv) memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1) memw($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if ($src1.new) memw($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1) memw($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+ IntRegs:$src5),
+ "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5.new",
+ []>,
+ Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memw(Rx++#s4:2)=Nt.new
+// if (Pv) memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if ($src1) memw($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if ($src1.new) memw($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if (!$src1) memw($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+ (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+ "if (!$src1.new) memw($src3++#$offset) = $src2.new",
+ [],"$src3 = $dst">,
+ Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===//
+// NV/ST -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/ALU +
+//===----------------------------------------------------------------------===//
+
+// Add and accumulate.
+// Rd=add(Rs,add(Ru,#s6))
+def ADDr_ADDri_V4 : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3),
+ "$dst = add($src1, add($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (add IntRegs:$src1, (add IntRegs:$src2, s6ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// Rd=add(Rs,sub(#s6,Ru))
+def ADDr_SUBri_V4 : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+ "$dst = add($src1, sub(#$src2, $src3))",
+ [(set IntRegs:$dst,
+ (add IntRegs:$src1, (sub s6ImmPred:$src2, IntRegs:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// Generates the same instruction as ADDr_SUBri_V4 but matches different
+// pattern.
+// Rd=add(Rs,sub(#s6,Ru))
+def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+ "$dst = add($src1, sub(#$src2, $src3))",
+ [(set IntRegs:$dst,
+ (sub (add IntRegs:$src1, s6ImmPred:$src2), IntRegs:$src3))]>,
+ Requires<[HasV4T]>;
+
+
+// Add or subtract doublewords with carry.
+//TODO:
+// Rdd=add(Rss,Rtt,Px):carry
+//TODO:
+// Rdd=sub(Rss,Rtt,Px):carry
+
+
+// Logical doublewords.
+// Rdd=and(Rtt,~Rss)
+def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ "$dst = and($src1, ~$src2)",
+ [(set DoubleRegs:$dst, (and DoubleRegs:$src1,
+ (not DoubleRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+// Rdd=or(Rtt,~Rss)
+def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ "$dst = or($src1, ~$src2)",
+ [(set DoubleRegs:$dst,
+ (or DoubleRegs:$src1, (not DoubleRegs:$src2)))]>,
+ Requires<[HasV4T]>;
+
+
+// Logical-logical doublewords.
+// Rxx^=xor(Rss,Rtt)
+def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+ "$dst ^= xor($src2, $src3)",
+ [(set DoubleRegs:$dst,
+ (xor DoubleRegs:$src1, (xor DoubleRegs:$src2, DoubleRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Logical-logical words.
+// Rx=or(Ru,and(Rx,#s10))
+def ORr_ANDri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+ "$dst = or($src1, and($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx[&|^]=and(Rs,Rt)
+// Rx&=and(Rs,Rt)
+def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst &= and($src2, $src3)",
+ [(set IntRegs:$dst,
+ (and IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=and(Rs,Rt)
+def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst |= and($src2, $src3)",
+ [(set IntRegs:$dst,
+ (or IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx^=and(Rs,Rt)
+def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst ^= and($src2, $src3)",
+ [(set IntRegs:$dst,
+ (xor IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx[&|^]=and(Rs,~Rt)
+// Rx&=and(Rs,~Rt)
+def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst &= and($src2, ~$src3)",
+ [(set IntRegs:$dst,
+ (and IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=and(Rs,~Rt)
+def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst |= and($src2, ~$src3)",
+ [(set IntRegs:$dst,
+ (or IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx^=and(Rs,~Rt)
+def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst ^= and($src2, ~$src3)",
+ [(set IntRegs:$dst,
+ (xor IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx[&|^]=or(Rs,Rt)
+// Rx&=or(Rs,Rt)
+def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst &= or($src2, $src3)",
+ [(set IntRegs:$dst,
+ (and IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=or(Rs,Rt)
+def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst |= or($src2, $src3)",
+ [(set IntRegs:$dst,
+ (or IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx^=or(Rs,Rt)
+def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst ^= or($src2, $src3)",
+ [(set IntRegs:$dst,
+ (xor IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx[&|^]=xor(Rs,Rt)
+// Rx&=xor(Rs,Rt)
+def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst &= xor($src2, $src3)",
+ [(set IntRegs:$dst,
+ (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=xor(Rs,Rt)
+def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst |= xor($src2, $src3)",
+ [(set IntRegs:$dst,
+ (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx^=xor(Rs,Rt)
+def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+ "$dst ^= xor($src2, $src3)",
+ [(set IntRegs:$dst,
+ (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=and(Rs,#s10)
+def ORr_ANDri2_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+ "$dst |= and($src2, #$src3)",
+ [(set IntRegs:$dst,
+ (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx|=or(Rs,#s10)
+def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+ "$dst |= or($src2, #$src3)",
+ [(set IntRegs:$dst,
+ (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Modulo wrap
+// Rd=modwrap(Rs,Rt)
+// Round
+// Rd=cround(Rs,#u5)
+// Rd=cround(Rs,Rt)
+// Rd=round(Rs,#u5)[:sat]
+// Rd=round(Rs,Rt)[:sat]
+// Vector reduce add unsigned halfwords
+// Rd=vraddh(Rss,Rtt)
+// Vector add bytes
+// Rdd=vaddb(Rss,Rtt)
+// Vector conditional negate
+// Rdd=vcnegh(Rss,Rt)
+// Rxx+=vrcnegh(Rss,Rt)
+// Vector maximum bytes
+// Rdd=vmaxb(Rtt,Rss)
+// Vector reduce maximum halfwords
+// Rxx=vrmaxh(Rss,Ru)
+// Rxx=vrmaxuh(Rss,Ru)
+// Vector reduce maximum words
+// Rxx=vrmaxuw(Rss,Ru)
+// Rxx=vrmaxw(Rss,Ru)
+// Vector minimum bytes
+// Rdd=vminb(Rtt,Rss)
+// Vector reduce minimum halfwords
+// Rxx=vrminh(Rss,Ru)
+// Rxx=vrminuh(Rss,Ru)
+// Vector reduce minimum words
+// Rxx=vrminuw(Rss,Ru)
+// Rxx=vrminw(Rss,Ru)
+// Vector subtract bytes
+// Rdd=vsubb(Rss,Rtt)
+
+//===----------------------------------------------------------------------===//
+// XTYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/MPY +
+//===----------------------------------------------------------------------===//
+
+// Multiply and user lower result.
+// Rd=add(#u6,mpyi(Rs,#U6))
+def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst),
+ (ins u6Imm:$src1, IntRegs:$src2, u6Imm:$src3),
+ "$dst = add(#$src1, mpyi($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (add (mul IntRegs:$src2, u6ImmPred:$src3), u6ImmPred:$src1))]>,
+ Requires<[HasV4T]>;
+
+// Rd=add(#u6,mpyi(Rs,Rt))
+
+def ADDi_MPYrr_V4 : MInst<(outs IntRegs:$dst),
+ (ins u6Imm:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst = add(#$src1, mpyi($src2, $src3))",
+ [(set IntRegs:$dst,
+ (add (mul IntRegs:$src2, IntRegs:$src3), u6ImmPred:$src1))]>,
+ Requires<[HasV4T]>;
+
+// Rd=add(Ru,mpyi(#u6:2,Rs))
+def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+ "$dst = add($src1, mpyi(#$src2, $src3))",
+ [(set IntRegs:$dst,
+ (add IntRegs:$src1, (mul IntRegs:$src3, u6_2ImmPred:$src2)))]>,
+ Requires<[HasV4T]>;
+
+// Rd=add(Ru,mpyi(Rs,#u6))
+def ADDr_MPYri_V4 : MInst<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, u6Imm:$src3),
+ "$dst = add($src1, mpyi($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (add IntRegs:$src1, (mul IntRegs:$src2, u6ImmPred:$src3)))]>,
+ Requires<[HasV4T]>;
+
+// Rx=add(Ru,mpyi(Rx,Rs))
+def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+ "$dst = add($src1, mpyi($src2, $src3))",
+ [(set IntRegs:$dst,
+ (add IntRegs:$src1, (mul IntRegs:$src2, IntRegs:$src3)))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+
+// Polynomial multiply words
+// Rdd=pmpyw(Rs,Rt)
+// Rxx^=pmpyw(Rs,Rt)
+
+// Vector reduce multiply word by signed half (32x16)
+// Rdd=vrmpyweh(Rss,Rtt)[:<<1]
+// Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+// Rxx+=vrmpyweh(Rss,Rtt)[:<<1]
+// Rxx+=vrmpywoh(Rss,Rtt)[:<<1]
+
+// Multiply and use upper result
+// Rd=mpy(Rs,Rt.H):<<1:sat
+// Rd=mpy(Rs,Rt.L):<<1:sat
+// Rd=mpy(Rs,Rt):<<1
+// Rd=mpy(Rs,Rt):<<1:sat
+// Rd=mpysu(Rs,Rt)
+// Rx+=mpy(Rs,Rt):<<1:sat
+// Rx-=mpy(Rs,Rt):<<1:sat
+
+// Vector multiply bytes
+// Rdd=vmpybsu(Rs,Rt)
+// Rdd=vmpybu(Rs,Rt)
+// Rxx+=vmpybsu(Rs,Rt)
+// Rxx+=vmpybu(Rs,Rt)
+
+// Vector polynomial multiply halfwords
+// Rdd=vpmpyh(Rs,Rt)
+// Rxx^=vpmpyh(Rs,Rt)
+
+//===----------------------------------------------------------------------===//
+// XTYPE/MPY -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/SHIFT +
+//===----------------------------------------------------------------------===//
+
+// Shift by immediate and accumulate.
+// Rx=add(#u8,asl(Rx,#U5))
+def ADDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = add(#$src1, asl($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (add (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx=add(#u8,lsr(Rx,#U5))
+def ADDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = add(#$src1, lsr($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (add (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx=sub(#u8,asl(Rx,#U5))
+def SUBi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = sub(#$src1, asl($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (sub (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+// Rx=sub(#u8,lsr(Rx,#U5))
+def SUBi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = sub(#$src1, lsr($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (sub (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+
+//Shift by immediate and logical.
+//Rx=and(#u8,asl(Rx,#U5))
+def ANDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = and(#$src1, asl($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (and (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rx=and(#u8,lsr(Rx,#U5))
+def ANDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = and(#$src1, lsr($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (and (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rx=or(#u8,asl(Rx,#U5))
+def ORi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = or(#$src1, asl($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (or (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rx=or(#u8,lsr(Rx,#U5))
+def ORi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+ (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+ "$dst = or(#$src1, lsr($src2, #$src3))",
+ [(set IntRegs:$dst,
+ (or (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+ "$src2 = $dst">,
+ Requires<[HasV4T]>;
+
+
+//Shift by register.
+//Rd=lsl(#s6,Rt)
+def LSLi_V4 : MInst<(outs IntRegs:$dst), (ins s6Imm:$src1, IntRegs:$src2),
+ "$dst = lsl(#$src1, $src2)",
+ [(set IntRegs:$dst, (shl s6ImmPred:$src1, IntRegs:$src2))]>,
+ Requires<[HasV4T]>;
+
+
+//Shift by register and logical.
+//Rxx^=asl(Rss,Rt)
+def ASLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ "$dst ^= asl($src2, $src3)",
+ [(set DoubleRegs:$dst,
+ (xor DoubleRegs:$src1, (shl DoubleRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rxx^=asr(Rss,Rt)
+def ASRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ "$dst ^= asr($src2, $src3)",
+ [(set DoubleRegs:$dst,
+ (xor DoubleRegs:$src1, (sra DoubleRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rxx^=lsl(Rss,Rt)
+def LSLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ "$dst ^= lsl($src2, $src3)",
+ [(set DoubleRegs:$dst,
+ (xor DoubleRegs:$src1, (shl DoubleRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+//Rxx^=lsr(Rss,Rt)
+def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+ "$dst ^= lsr($src2, $src3)",
+ [(set DoubleRegs:$dst,
+ (xor DoubleRegs:$src1, (srl DoubleRegs:$src2, IntRegs:$src3)))],
+ "$src1 = $dst">,
+ Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/SHIFT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Word, Half, Byte
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Word
+//
+// Implemented:
+// MEMw_ADDi_indexed_V4 : memw(Rs+#u6:2)+=#U5
+// MEMw_SUBi_indexed_V4 : memw(Rs+#u6:2)-=#U5
+// MEMw_ADDr_indexed_V4 : memw(Rs+#u6:2)+=Rt
+// MEMw_SUBr_indexed_V4 : memw(Rs+#u6:2)-=Rt
+// MEMw_CLRr_indexed_V4 : memw(Rs+#u6:2)&=Rt
+// MEMw_SETr_indexed_V4 : memw(Rs+#u6:2)|=Rt
+// MEMw_ADDi_V4 : memw(Rs+#u6:2)+=#U5
+// MEMw_SUBi_V4 : memw(Rs+#u6:2)-=#U5
+// MEMw_ADDr_V4 : memw(Rs+#u6:2)+=Rt
+// MEMw_SUBr_V4 : memw(Rs+#u6:2)-=Rt
+// MEMw_CLRr_V4 : memw(Rs+#u6:2)&=Rt
+// MEMw_SETr_V4 : memw(Rs+#u6:2)|=Rt
+//
+// Not implemented:
+// MEMw_CLRi_indexed_V4 : memw(Rs+#u6:2)=clrbit(#U5)
+// MEMw_SETi_indexed_V4 : memw(Rs+#u6:2)=setbit(#U5)
+// MEMw_CLRi_V4 : memw(Rs+#u6:2)=clrbit(#U5)
+// MEMw_SETi_V4 : memw(Rs+#u6:2)=setbit(#U5)
+//===----------------------------------------------------------------------===//
+
+
+// MEMw_ADDSUBi_indexed_V4:
+// pseudo operation for MEMw_ADDi_indexed_V4 and
+// MEMw_SUBi_indexed_V4 a later pass will change it
+// to the corresponding pattern.
+let AddedComplexity = 30 in
+def MEMw_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, m6Imm:$addend),
+ "Error; should not emit",
+ [(store (add (load (add IntRegs:$base, u6_2ImmPred:$offset)),
+m6ImmPred:$addend),
+ (add IntRegs:$base, u6_2ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) += #U5
+let AddedComplexity = 30 in
+def MEMw_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$addend),
+ "memw($base+#$offset) += $addend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) -= #U5
+let AddedComplexity = 30 in
+def MEMw_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$subend),
+ "memw($base+#$offset) -= $subend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) += Rt
+let AddedComplexity = 30 in
+def MEMw_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$addend),
+ "memw($base+#$offset) += $addend",
+ [(store (add (load (add IntRegs:$base, u6_2ImmPred:$offset)),
+IntRegs:$addend),
+ (add IntRegs:$base, u6_2ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) -= Rt
+let AddedComplexity = 30 in
+def MEMw_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$subend),
+ "memw($base+#$offset) -= $subend",
+ [(store (sub (load (add IntRegs:$base, u6_2ImmPred:$offset)),
+IntRegs:$subend),
+ (add IntRegs:$base, u6_2ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) &= Rt
+let AddedComplexity = 30 in
+def MEMw_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$andend),
+ "memw($base+#$offset) += $andend",
+ [(store (and (load (add IntRegs:$base, u6_2ImmPred:$offset)),
+IntRegs:$andend),
+ (add IntRegs:$base, u6_2ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) |= Rt
+let AddedComplexity = 30 in
+def MEMw_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$orend),
+ "memw($base+#$offset) |= $orend",
+ [(store (or (load (add IntRegs:$base, u6_2ImmPred:$offset)),
+ IntRegs:$orend),
+ (add IntRegs:$base, u6_2ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// MEMw_ADDSUBi_V4:
+// Pseudo operation for MEMw_ADDi_V4 and MEMw_SUBi_V4
+// a later pass will change it to the right pattern.
+let AddedComplexity = 30 in
+def MEMw_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, m6Imm:$addend),
+ "Error; should not emit",
+ [(store (add (load ADDRriU6_2:$addr), m6ImmPred:$addend),
+ ADDRriU6_2:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) += #U5
+let AddedComplexity = 30 in
+def MEMw_ADDi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, u5Imm:$addend),
+ "memw($addr) += $addend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) -= #U5
+let AddedComplexity = 30 in
+def MEMw_SUBi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, u5Imm:$subend),
+ "memw($addr) -= $subend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) += Rt
+let AddedComplexity = 30 in
+def MEMw_ADDr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$addend),
+ "memw($addr) += $addend",
+ [(store (add (load ADDRriU6_2:$addr), IntRegs:$addend),
+ ADDRriU6_2:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) -= Rt
+let AddedComplexity = 30 in
+def MEMw_SUBr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$subend),
+ "memw($addr) -= $subend",
+ [(store (sub (load ADDRriU6_2:$addr), IntRegs:$subend),
+ ADDRriU6_2:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) &= Rt
+let AddedComplexity = 30 in
+def MEMw_ANDr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$andend),
+ "memw($addr) &= $andend",
+ [(store (and (load ADDRriU6_2:$addr), IntRegs:$andend),
+ ADDRriU6_2:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) |= Rt
+let AddedComplexity = 30 in
+def MEMw_ORr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$orend),
+ "memw($addr) |= $orend",
+ [(store (or (load ADDRriU6_2:$addr), IntRegs:$orend),
+ADDRriU6_2:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Halfword
+//
+// Implemented:
+// MEMh_ADDi_indexed_V4 : memw(Rs+#u6:2)+=#U5
+// MEMh_SUBi_indexed_V4 : memw(Rs+#u6:2)-=#U5
+// MEMh_ADDr_indexed_V4 : memw(Rs+#u6:2)+=Rt
+// MEMh_SUBr_indexed_V4 : memw(Rs+#u6:2)-=Rt
+// MEMh_CLRr_indexed_V4 : memw(Rs+#u6:2)&=Rt
+// MEMh_SETr_indexed_V4 : memw(Rs+#u6:2)|=Rt
+// MEMh_ADDi_V4 : memw(Rs+#u6:2)+=#U5
+// MEMh_SUBi_V4 : memw(Rs+#u6:2)-=#U5
+// MEMh_ADDr_V4 : memw(Rs+#u6:2)+=Rt
+// MEMh_SUBr_V4 : memw(Rs+#u6:2)-=Rt
+// MEMh_CLRr_V4 : memw(Rs+#u6:2)&=Rt
+// MEMh_SETr_V4 : memw(Rs+#u6:2)|=Rt
+//
+// Not implemented:
+// MEMh_CLRi_indexed_V4 : memw(Rs+#u6:2)=clrbit(#U5)
+// MEMh_SETi_indexed_V4 : memw(Rs+#u6:2)=setbit(#U5)
+// MEMh_CLRi_V4 : memw(Rs+#u6:2)=clrbit(#U5)
+// MEMh_SETi_V4 : memw(Rs+#u6:2)=setbit(#U5)
+//===----------------------------------------------------------------------===//
+
+
+// MEMh_ADDSUBi_indexed_V4:
+// Pseudo operation for MEMh_ADDi_indexed_V4 and
+// MEMh_SUBi_indexed_V4 a later pass will change it
+// to the corresponding pattern.
+let AddedComplexity = 30 in
+def MEMh_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, m6Imm:$addend),
+ "Error; should not emit",
+ [(truncstorei16 (add (sextloadi16 (add IntRegs:$base,
+ u6_1ImmPred:$offset)),
+ m6ImmPred:$addend),
+ (add IntRegs:$base, u6_1ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) += #U5
+let AddedComplexity = 30 in
+def MEMh_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$addend),
+ "memh($base+#$offset) += $addend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) -= #U5
+let AddedComplexity = 30 in
+def MEMh_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$subend),
+ "memh($base+#$offset) -= $subend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) += Rt
+let AddedComplexity = 30 in
+def MEMh_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$addend),
+ "memh($base+#$offset) += $addend",
+ [(truncstorei16 (add (sextloadi16 (add IntRegs:$base,
+ u6_1ImmPred:$offset)),
+ IntRegs:$addend),
+ (add IntRegs:$base, u6_1ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) -= Rt
+let AddedComplexity = 30 in
+def MEMh_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$subend),
+ "memh($base+#$offset) -= $subend",
+ [(truncstorei16 (sub (sextloadi16 (add IntRegs:$base,
+ u6_1ImmPred:$offset)),
+ IntRegs:$subend),
+ (add IntRegs:$base, u6_1ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) &= Rt
+let AddedComplexity = 30 in
+def MEMh_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$andend),
+ "memh($base+#$offset) += $andend",
+ [(truncstorei16 (and (sextloadi16 (add IntRegs:$base,
+ u6_1ImmPred:$offset)),
+ IntRegs:$andend),
+ (add IntRegs:$base, u6_1ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) |= Rt
+let AddedComplexity = 30 in
+def MEMh_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$orend),
+ "memh($base+#$offset) |= $orend",
+ [(truncstorei16 (or (sextloadi16 (add IntRegs:$base,
+ u6_1ImmPred:$offset)),
+ IntRegs:$orend),
+ (add IntRegs:$base, u6_1ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// MEMh_ADDSUBi_V4:
+// Pseudo operation for MEMh_ADDi_V4 and MEMh_SUBi_V4
+// a later pass will change it to the right pattern.
+let AddedComplexity = 30 in
+def MEMh_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, m6Imm:$addend),
+ "Error; should not emit",
+ [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr),
+ m6ImmPred:$addend), ADDRriU6_1:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) += #U5
+let AddedComplexity = 30 in
+def MEMh_ADDi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, u5Imm:$addend),
+ "memh($addr) += $addend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) -= #U5
+let AddedComplexity = 30 in
+def MEMh_SUBi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, u5Imm:$subend),
+ "memh($addr) -= $subend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) += Rt
+let AddedComplexity = 30 in
+def MEMh_ADDr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$addend),
+ "memh($addr) += $addend",
+ [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr),
+ IntRegs:$addend), ADDRriU6_1:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) -= Rt
+let AddedComplexity = 30 in
+def MEMh_SUBr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$subend),
+ "memh($addr) -= $subend",
+ [(truncstorei16 (sub (sextloadi16 ADDRriU6_1:$addr),
+ IntRegs:$subend), ADDRriU6_1:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) &= Rt
+let AddedComplexity = 30 in
+def MEMh_ANDr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$andend),
+ "memh($addr) &= $andend",
+ [(truncstorei16 (and (sextloadi16 ADDRriU6_1:$addr),
+ IntRegs:$andend), ADDRriU6_1:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) |= Rt
+let AddedComplexity = 30 in
+def MEMh_ORr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$orend),
+ "memh($addr) |= $orend",
+ [(truncstorei16 (or (sextloadi16 ADDRriU6_1:$addr),
+ IntRegs:$orend), ADDRriU6_1:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Byte
+//
+// Implemented:
+// MEMb_ADDi_indexed_V4 : memb(Rs+#u6:0)+=#U5
+// MEMb_SUBi_indexed_V4 : memb(Rs+#u6:0)-=#U5
+// MEMb_ADDr_indexed_V4 : memb(Rs+#u6:0)+=Rt
+// MEMb_SUBr_indexed_V4 : memb(Rs+#u6:0)-=Rt
+// MEMb_CLRr_indexed_V4 : memb(Rs+#u6:0)&=Rt
+// MEMb_SETr_indexed_V4 : memb(Rs+#u6:0)|=Rt
+// MEMb_ADDi_V4 : memb(Rs+#u6:0)+=#U5
+// MEMb_SUBi_V4 : memb(Rs+#u6:0)-=#U5
+// MEMb_ADDr_V4 : memb(Rs+#u6:0)+=Rt
+// MEMb_SUBr_V4 : memb(Rs+#u6:0)-=Rt
+// MEMb_CLRr_V4 : memb(Rs+#u6:0)&=Rt
+// MEMb_SETr_V4 : memb(Rs+#u6:0)|=Rt
+//
+// Not implemented:
+// MEMb_CLRi_indexed_V4 : memb(Rs+#u6:0)=clrbit(#U5)
+// MEMb_SETi_indexed_V4 : memb(Rs+#u6:0)=setbit(#U5)
+// MEMb_CLRi_V4 : memb(Rs+#u6:0)=clrbit(#U5)
+// MEMb_SETi_V4 : memb(Rs+#u6:0)=setbit(#U5)
+//===----------------------------------------------------------------------===//
+
+
+// MEMb_ADDSUBi_indexed_V4:
+// Pseudo operation for MEMb_ADDi_indexed_V4 and
+// MEMb_SUBi_indexed_V4 a later pass will change it
+// to the corresponding pattern.
+let AddedComplexity = 30 in
+def MEMb_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, m6Imm:$addend),
+ "Error; should not emit",
+ [(truncstorei8 (add (sextloadi8 (add IntRegs:$base,
+ u6_0ImmPred:$offset)),
+ m6ImmPred:$addend),
+ (add IntRegs:$base, u6_0ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) += #U5
+let AddedComplexity = 30 in
+def MEMb_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$addend),
+ "memb($base+#$offset) += $addend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) -= #U5
+let AddedComplexity = 30 in
+def MEMb_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$subend),
+ "memb($base+#$offset) -= $subend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) += Rt
+let AddedComplexity = 30 in
+def MEMb_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$addend),
+ "memb($base+#$offset) += $addend",
+ [(truncstorei8 (add (sextloadi8 (add IntRegs:$base,
+ u6_0ImmPred:$offset)),
+ IntRegs:$addend),
+ (add IntRegs:$base, u6_0ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) -= Rt
+let AddedComplexity = 30 in
+def MEMb_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$subend),
+ "memb($base+#$offset) -= $subend",
+ [(truncstorei8 (sub (sextloadi8 (add IntRegs:$base,
+ u6_0ImmPred:$offset)),
+ IntRegs:$subend),
+ (add IntRegs:$base, u6_0ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) &= Rt
+let AddedComplexity = 30 in
+def MEMb_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$andend),
+ "memb($base+#$offset) += $andend",
+ [(truncstorei8 (and (sextloadi8 (add IntRegs:$base,
+ u6_0ImmPred:$offset)),
+ IntRegs:$andend),
+ (add IntRegs:$base, u6_0ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) |= Rt
+let AddedComplexity = 30 in
+def MEMb_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
+ (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$orend),
+ "memb($base+#$offset) |= $orend",
+ [(truncstorei8 (or (sextloadi8 (add IntRegs:$base,
+ u6_0ImmPred:$offset)),
+ IntRegs:$orend),
+ (add IntRegs:$base, u6_0ImmPred:$offset))]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// MEMb_ADDSUBi_V4:
+// Pseudo operation for MEMb_ADDi_V4 and MEMb_SUBi_V4
+// a later pass will change it to the right pattern.
+let AddedComplexity = 30 in
+def MEMb_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, m6Imm:$addend),
+ "Error; should not emit",
+ [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr),
+ m6ImmPred:$addend), ADDRriU6_0:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) += #U5
+let AddedComplexity = 30 in
+def MEMb_ADDi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, u5Imm:$addend),
+ "memb($addr) += $addend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) -= #U5
+let AddedComplexity = 30 in
+def MEMb_SUBi_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, u5Imm:$subend),
+ "memb($addr) -= $subend",
+ []>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) += Rt
+let AddedComplexity = 30 in
+def MEMb_ADDr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$addend),
+ "memb($addr) += $addend",
+ [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr),
+ IntRegs:$addend), ADDRriU6_0:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) -= Rt
+let AddedComplexity = 30 in
+def MEMb_SUBr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$subend),
+ "memb($addr) -= $subend",
+ [(truncstorei8 (sub (sextloadi8 ADDRriU6_0:$addr),
+ IntRegs:$subend), ADDRriU6_0:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) &= Rt
+let AddedComplexity = 30 in
+def MEMb_ANDr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$andend),
+ "memb($addr) &= $andend",
+ [(truncstorei8 (and (sextloadi8 ADDRriU6_0:$addr),
+ IntRegs:$andend), ADDRriU6_0:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) |= Rt
+let AddedComplexity = 30 in
+def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs),
+ (ins MEMri:$addr, IntRegs:$orend),
+ "memb($addr) |= $orend",
+ [(truncstorei8 (or (sextloadi8 ADDRriU6_0:$addr),
+ IntRegs:$orend), ADDRriU6_0:$addr)]>,
+ Requires<[HasV4T, UseMEMOP]>;
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PRED +
+//===----------------------------------------------------------------------===//
+
+// Hexagon V4 only supports these flavors of byte/half compare instructions:
+// EQ/GT/GTU. Other flavors like GE/GEU/LT/LTU/LE/LEU are not supported by
+// hardware. However, compiler can still implement these patterns through
+// appropriate patterns combinations based on current implemented patterns.
+// The implemented patterns are: EQ/GT/GTU.
+// Missing patterns are: GE/GEU/LT/LTU/LE/LEU.
+
+// Pd=cmpb.eq(Rs,#u8)
+let isCompare = 1 in
+def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, u8Imm:$src2),
+ "$dst = cmpb.eq($src1, #$src2)",
+ [(set PredRegs:$dst, (seteq (and IntRegs:$src1, 255),
+ u8ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.eq(Rs,Rt)
+let isCompare = 1 in
+def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmpb.eq($src1, $src2)",
+ [(set PredRegs:$dst, (seteq (and (xor IntRegs:$src1,
+ IntRegs:$src2),
+ 255),
+ 0))]>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.eq(Rs,Rt)
+let isCompare = 1 in
+def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmpb.eq($src1, $src2)",
+ [(set PredRegs:$dst, (seteq (shl IntRegs:$src1, (i32 24)),
+ (shl IntRegs:$src2, (i32 24))))]>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.gt(Rs,#s8)
+let isCompare = 1 in
+def CMPbGTri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, s32Imm:$src2),
+ "$dst = cmpb.gt($src1, #$src2)",
+ [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 24)),
+ s32_24ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.gt(Rs,Rt)
+let isCompare = 1 in
+def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmpb.gt($src1, $src2)",
+ [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 24)),
+ (shl IntRegs:$src2, (i32 24))))]>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.gtu(Rs,#u7)
+let isCompare = 1 in
+def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, u7Imm:$src2),
+ "$dst = cmpb.gtu($src1, #$src2)",
+ [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 255),
+ u7ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// Pd=cmpb.gtu(Rs,Rt)
+let isCompare = 1 in
+def CMPbGTUrr_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmpb.gtu($src1, $src2)",
+ [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 255),
+ (and IntRegs:$src2, 255)))]>,
+ Requires<[HasV4T]>;
+
+// Signed half compare(.eq) ri.
+// Pd=cmph.eq(Rs,#s8)
+let isCompare = 1 in
+def CMPhEQri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, u16Imm:$src2),
+ "$dst = cmph.eq($src1, #$src2)",
+ [(set PredRegs:$dst, (seteq (and IntRegs:$src1, 65535),
+ u16_s8ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// Signed half compare(.eq) rr.
+// Case 1: xor + and, then compare:
+// r0=xor(r0,r1)
+// r0=and(r0,#0xffff)
+// p0=cmp.eq(r0,#0)
+// Pd=cmph.eq(Rs,Rt)
+let isCompare = 1 in
+def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmph.eq($src1, $src2)",
+ [(set PredRegs:$dst, (seteq (and (xor IntRegs:$src1,
+ IntRegs:$src2),
+ 65535),
+ 0))]>,
+ Requires<[HasV4T]>;
+
+// Signed half compare(.eq) rr.
+// Case 2: shift left 16 bits then compare:
+// r0=asl(r0,16)
+// r1=asl(r1,16)
+// p0=cmp.eq(r0,r1)
+// Pd=cmph.eq(Rs,Rt)
+let isCompare = 1 in
+def CMPhEQrr_shl_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmph.eq($src1, $src2)",
+ [(set PredRegs:$dst, (seteq (shl IntRegs:$src1, (i32 16)),
+ (shl IntRegs:$src2, (i32 16))))]>,
+ Requires<[HasV4T]>;
+
+// Signed half compare(.gt) ri.
+// Pd=cmph.gt(Rs,#s8)
+let isCompare = 1 in
+def CMPhGTri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, s32Imm:$src2),
+ "$dst = cmph.gt($src1, #$src2)",
+ [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 16)),
+ s32_16s8ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+// Signed half compare(.gt) rr.
+// Pd=cmph.gt(Rs,Rt)
+let isCompare = 1 in
+def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmph.gt($src1, $src2)",
+ [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 16)),
+ (shl IntRegs:$src2, (i32 16))))]>,
+ Requires<[HasV4T]>;
+
+// Unsigned half compare rr (.gtu).
+// Pd=cmph.gtu(Rs,Rt)
+let isCompare = 1 in
+def CMPhGTUrr_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = cmph.gtu($src1, $src2)",
+ [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 65535),
+ (and IntRegs:$src2, 65535)))]>,
+ Requires<[HasV4T]>;
+
+// Unsigned half compare ri (.gtu).
+// Pd=cmph.gtu(Rs,#u7)
+let isCompare = 1 in
+def CMPhGTUri_V4 : MInst<(outs PredRegs:$dst),
+ (ins IntRegs:$src1, u7Imm:$src2),
+ "$dst = cmph.gtu($src1, #$src2)",
+ [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 65535),
+ u7ImmPred:$src2))]>,
+ Requires<[HasV4T]>;
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PRED -
+//===----------------------------------------------------------------------===//
+
+//Deallocate frame and return.
+// dealloc_return
+let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicable = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ def DEALLOC_RET_V4 : NVInst_V4<(outs), (ins i32imm:$amt1),
+ "dealloc_return",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (Ps) dealloc_return
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ def DEALLOC_RET_cPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, i32imm:$amt1),
+ "if ($src1) dealloc_return",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (!Ps) dealloc_return
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ def DEALLOC_RET_cNotPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+ i32imm:$amt1),
+ "if (!$src1) dealloc_return",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (Ps.new) dealloc_return:nt
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ def DEALLOC_RET_cdnPnt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+ i32imm:$amt1),
+ "if ($src1.new) dealloc_return:nt",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (!Ps.new) dealloc_return:nt
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ def DEALLOC_RET_cNotdnPnt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+ i32imm:$amt1),
+ "if (!$src1.new) dealloc_return:nt",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (Ps.new) dealloc_return:t
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ def DEALLOC_RET_cdnPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+ i32imm:$amt1),
+ "if ($src1.new) dealloc_return:t",
+ []>,
+ Requires<[HasV4T]>;
+}
+
+// if (!Ps.new) dealloc_return:nt
+let isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+ def DEALLOC_RET_cNotdnPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+ i32imm:$amt1),
+ "if (!$src1.new) dealloc_return:t",
+ []>,
+ Requires<[HasV4T]>;
+}
diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td
new file mode 100644
index 0000000..1328eba
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -0,0 +1,3462 @@
+//===- HexagonIntrinsics.td - Instruction intrinsics -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is populated based on the following specs:
+// Hexagon V2 Architecture
+// Application-Level Specification
+// 80-V9418-8 Rev. B
+// March 4, 2008
+//===----------------------------------------------------------------------===//
+
+//
+// ALU 32 types.
+//
+
+class qi_ALU32_sisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_ALU32_sis10<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_ALU32_sis8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_ALU32_siu8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_ALU32_siu9<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_qisisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_ALU32_qis8si<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2,
+ IntRegs:$src3))]>;
+
+class si_ALU32_qisis8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ s8Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ imm:$src3))]>;
+
+class si_ALU32_qis8s8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2, s8Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>;
+
+class si_ALU32_sisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU32_sisi_sat<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU32_sisi_rnd<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU32_sis16<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s16Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_sis10<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_s10si<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins s10Imm:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")),
+ [(set IntRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>;
+
+class si_lo_ALU32_siu16<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2),
+ !strconcat("$dst.l = ", !strconcat(opc , "#$src2")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_hi_ALU32_siu16<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2),
+ !strconcat("$dst.h = ", !strconcat(opc , "#$src2")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_s16<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins s16Imm:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "#$src1")),
+ [(set IntRegs:$dst, (IntID imm:$src1))]>;
+
+class di_ALU32_s8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "#$src1")),
+ [(set DoubleRegs:$dst, (IntID imm:$src1))]>;
+
+class di_ALU64_di<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "$src")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class si_ALU32_si<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_ALU32_si_tfr<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "$src")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+//
+// ALU 64 types.
+//
+
+class si_ALU64_si_sat<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_ALU64_didi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_ALU64_sidi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_ALU64_didi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_ALU64_qididi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2,
+ DoubleRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2,
+ DoubleRegs:$src3))]>;
+
+class di_ALU64_sisi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_ALU64_didi_sat<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_rnd<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_crnd<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_rnd_sat<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_crnd_sat<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class qi_ALU64_didi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class si_ALU64_sisi<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_sat_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_hh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_hl<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_ll<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_hh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_hl<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_ll<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_hh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):sat:<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.H):sat:<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_hl<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.L):sat:<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_ll<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.L):sat:<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_hh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_hl<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_ll<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<16")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_lh<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_ll<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_sat<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+//
+// SInst classes.
+//
+
+class qi_SInst_qi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
+
+class qi_SInst_qi_pxfer<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "$src")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
+
+class qi_SInst_qiqi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_qiqi_neg<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, !$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_SInst_di<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class di_SInst_di_sat<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class si_SInst_di<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class si_SInst_di_sat<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class di_SInst_disi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class di_SInst_didi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_SInst_si<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
+ !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>;
+
+class si_SInst_sisiu3<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u3Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ imm:$src3))]>;
+
+class si_SInst_diu5<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+class si_SInst_disi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class si_SInst_sidi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_SInst_disisi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class di_SInst_sisi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_siu5<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_SInst_siu6<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u6Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_SInst_sisi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_SInst_si<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_si_sat<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class di_SInst_qi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "($src)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_qi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "$src")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_qiqi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_si<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
+ !strconcat("$dst = ", !strconcat(opc , "$src")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_sisi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_SInst_diu6<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+class si_SInst_siu5<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_SInst_siu5_rnd<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_SInst_siu5u5<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2, u5Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>;
+
+class si_SInst_sisisi_acc<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisisi_nac<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didisi_acc<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didisi_nac<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1, IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisiu5u5<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2, u5Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, #$src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2, imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisidi<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6u6<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u6Imm:$src2, u6Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, #$src2, #$src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ imm:$src2, imm:$src3))],
+ "$dst2 = $dst">;
+
+class di_SInst_dididi<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_diu6u6<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2,
+ u6Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2,
+ imm:$src3))]>;
+
+class di_SInst_didisi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3))]>;
+
+class di_SInst_didiqi<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
+ IntRegs:$src3))]>;
+
+class di_SInst_didiu3<string opc, Intrinsic IntID>
+ : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+ u3Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
+ imm:$src3))]>;
+
+class di_SInst_didisi_or<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didisi_and<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6_and<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u6Imm:$src2),
+ !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6_or<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u6Imm:$src2),
+ !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6_xor<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u6Imm:$src2),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisisi_and<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisisi_or<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+
+class si_SInst_sisiu5_and<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisiu5_or<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisiu5_xor<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisiu5_acc<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class si_SInst_sisiu5_nac<string opc, Intrinsic IntID>
+ : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6_acc<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1, imm:$src2))],
+ "$dst2 = $dst">;
+
+class di_SInst_didiu6_nac<string opc, Intrinsic IntID>
+ : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ u5Imm:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ imm:$src2))],
+ "$dst2 = $dst">;
+
+
+//
+// MInst classes.
+//
+
+class di_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_hh<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1:rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_hl<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.L):rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1:rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_lh<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.H):rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1:rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_ll<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.L):rnd")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_disisi_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_sat_conj<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_sat_conj<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2*):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_s1_sat_conj<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1, $src2*):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_s1_sat_conj<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1, $src2*):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_s8s8<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins s8Imm:$src1, s8Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "(#$src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID imm:$src1, imm:$src2))]>;
+
+class si_MInst_sisi<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hh<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_lh<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hl<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_ll<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+
+class si_MInst_sisi_hh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_lh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_hl<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_ll<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_up<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didi<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_MInst_didi_conj<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_MInst_sisi_s1_sat_conj<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2*):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):<<1:rnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_MInst_didi_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class di_MInst_didi_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):rnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class si_SInst_sisi_sat<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_l_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2.L):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_h_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2.H):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_sat_conj<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2*):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_s1_rnd_sat_conj<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2*):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisisi_xacc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst += ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst -= ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ IntRegs:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisis8_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ s8Imm:$src3),
+ !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisis8_nac<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ s8Imm:$src3),
+ !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisiu4u5<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ u4Imm:$src2, u5Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, #$src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ imm:$src2, imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisiu8_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ u8Imm:$src3),
+ !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisiu8_nac<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+ u8Imm:$src3),
+ !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+ imm:$src3))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.L, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc ,
+ "($src1.H, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_ALU32_sisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_sat_conj<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*):sat")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_s1_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didi_s1_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+ DoubleRegs:$src2))]>;
+
+class si_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, $src2):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class si_MInst_didi_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hl<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_lh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.H):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_ll<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.L, $src2.L):<<1:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc ,
+ "($src1.H, $src2.H):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hl<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.H, $src2.L):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.H, $src2.L):<<1:rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hl<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.H, $src2.L):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hl_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.H, $src2.L):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_lh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.H):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_lh<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.H):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.H):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.H):<<1:rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_ll<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.L):rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.L):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_ll<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.L):rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1.L, $src2.L):<<1:rnd")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_dididi_acc_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
+ DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_rnd_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):rnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):<<1:rnd:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_dididi_acc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_conj<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1.H, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1.H, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1.L, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1.L, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hl<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_lh<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_ll<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ",
+ !strconcat(opc , "($src1.H, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hl_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ",
+ !strconcat(opc , "($src1.H, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_lh_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ",
+ !strconcat(opc , "($src1.L, $src2.H):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_ll_s1<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst -= ",
+ !strconcat(opc , "($src1.L, $src2.L):<<1")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class di_MInst_disi_s1_sat<string opc, Intrinsic IntID>
+ : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didisi_acc_s1_sat<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ IntRegs:$src2),
+ !strconcat("$dst += ",
+ !strconcat(opc , "($src1, $src2):<<1:sat")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+ DoubleRegs:$src1,
+ IntRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_disi_s1_rnd_sat<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ",
+ !strconcat(opc , "($src1, $src2):<<1:rnd:sat")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_didi<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+
+/********************************************************************
+* ALU32/ALU *
+*********************************************************************/
+
+// ALU32 / ALU / Add.
+def Hexagon_A2_add:
+ si_ALU32_sisi <"add", int_hexagon_A2_add>;
+def Hexagon_A2_addi:
+ si_ALU32_sis16 <"add", int_hexagon_A2_addi>;
+
+// ALU32 / ALU / Logical operations.
+def Hexagon_A2_and:
+ si_ALU32_sisi <"and", int_hexagon_A2_and>;
+def Hexagon_A2_andir:
+ si_ALU32_sis10 <"and", int_hexagon_A2_andir>;
+def Hexagon_A2_not:
+ si_ALU32_si <"not", int_hexagon_A2_not>;
+def Hexagon_A2_or:
+ si_ALU32_sisi <"or", int_hexagon_A2_or>;
+def Hexagon_A2_orir:
+ si_ALU32_sis10 <"or", int_hexagon_A2_orir>;
+def Hexagon_A2_xor:
+ si_ALU32_sisi <"xor", int_hexagon_A2_xor>;
+
+// ALU32 / ALU / Negate.
+def Hexagon_A2_neg:
+ si_ALU32_si <"neg", int_hexagon_A2_neg>;
+
+// ALU32 / ALU / Subtract.
+def Hexagon_A2_sub:
+ si_ALU32_sisi <"sub", int_hexagon_A2_sub>;
+def Hexagon_A2_subri:
+ si_ALU32_s10si <"sub", int_hexagon_A2_subri>;
+
+// ALU32 / ALU / Transfer Immediate.
+def Hexagon_A2_tfril:
+ si_lo_ALU32_siu16 <"", int_hexagon_A2_tfril>;
+def Hexagon_A2_tfrih:
+ si_hi_ALU32_siu16 <"", int_hexagon_A2_tfrih>;
+def Hexagon_A2_tfrsi:
+ si_ALU32_s16 <"", int_hexagon_A2_tfrsi>;
+def Hexagon_A2_tfrpi:
+ di_ALU32_s8 <"", int_hexagon_A2_tfrpi>;
+
+// ALU32 / ALU / Transfer Register.
+def Hexagon_A2_tfr:
+ si_ALU32_si_tfr <"", int_hexagon_A2_tfr>;
+
+/********************************************************************
+* ALU32/PERM *
+*********************************************************************/
+
+// ALU32 / PERM / Combine.
+def Hexagon_A2_combinew:
+ di_ALU32_sisi <"combine", int_hexagon_A2_combinew>;
+def Hexagon_A2_combine_hh:
+ si_MInst_sisi_hh <"combine", int_hexagon_A2_combine_hh>;
+def Hexagon_A2_combine_lh:
+ si_MInst_sisi_lh <"combine", int_hexagon_A2_combine_lh>;
+def Hexagon_A2_combine_hl:
+ si_MInst_sisi_hl <"combine", int_hexagon_A2_combine_hl>;
+def Hexagon_A2_combine_ll:
+ si_MInst_sisi_ll <"combine", int_hexagon_A2_combine_ll>;
+def Hexagon_A2_combineii:
+ di_MInst_s8s8 <"combine", int_hexagon_A2_combineii>;
+
+// ALU32 / PERM / Mux.
+def Hexagon_C2_mux:
+ si_ALU32_qisisi <"mux", int_hexagon_C2_mux>;
+def Hexagon_C2_muxri:
+ si_ALU32_qis8si <"mux", int_hexagon_C2_muxri>;
+def Hexagon_C2_muxir:
+ si_ALU32_qisis8 <"mux", int_hexagon_C2_muxir>;
+def Hexagon_C2_muxii:
+ si_ALU32_qis8s8 <"mux", int_hexagon_C2_muxii>;
+
+// ALU32 / PERM / Shift halfword.
+def Hexagon_A2_aslh:
+ si_ALU32_si <"aslh", int_hexagon_A2_aslh>;
+def Hexagon_A2_asrh:
+ si_ALU32_si <"asrh", int_hexagon_A2_asrh>;
+def SI_to_SXTHI_asrh:
+ si_ALU32_si <"asrh", int_hexagon_SI_to_SXTHI_asrh>;
+
+// ALU32 / PERM / Sign/zero extend.
+def Hexagon_A2_sxth:
+ si_ALU32_si <"sxth", int_hexagon_A2_sxth>;
+def Hexagon_A2_sxtb:
+ si_ALU32_si <"sxtb", int_hexagon_A2_sxtb>;
+def Hexagon_A2_zxth:
+ si_ALU32_si <"zxth", int_hexagon_A2_zxth>;
+def Hexagon_A2_zxtb:
+ si_ALU32_si <"zxtb", int_hexagon_A2_zxtb>;
+
+/********************************************************************
+* ALU32/PRED *
+*********************************************************************/
+
+// ALU32 / PRED / Compare.
+def Hexagon_C2_cmpeq:
+ qi_ALU32_sisi <"cmp.eq", int_hexagon_C2_cmpeq>;
+def Hexagon_C2_cmpeqi:
+ qi_ALU32_sis10 <"cmp.eq", int_hexagon_C2_cmpeqi>;
+def Hexagon_C2_cmpgei:
+ qi_ALU32_sis8 <"cmp.ge", int_hexagon_C2_cmpgei>;
+def Hexagon_C2_cmpgeui:
+ qi_ALU32_siu8 <"cmp.geu", int_hexagon_C2_cmpgeui>;
+def Hexagon_C2_cmpgt:
+ qi_ALU32_sisi <"cmp.gt", int_hexagon_C2_cmpgt>;
+def Hexagon_C2_cmpgti:
+ qi_ALU32_sis10 <"cmp.gt", int_hexagon_C2_cmpgti>;
+def Hexagon_C2_cmpgtu:
+ qi_ALU32_sisi <"cmp.gtu", int_hexagon_C2_cmpgtu>;
+def Hexagon_C2_cmpgtui:
+ qi_ALU32_siu9 <"cmp.gtu", int_hexagon_C2_cmpgtui>;
+def Hexagon_C2_cmplt:
+ qi_ALU32_sisi <"cmp.lt", int_hexagon_C2_cmplt>;
+def Hexagon_C2_cmpltu:
+ qi_ALU32_sisi <"cmp.ltu", int_hexagon_C2_cmpltu>;
+
+/********************************************************************
+* ALU32/VH *
+*********************************************************************/
+
+// ALU32 / VH / Vector add halfwords.
+// Rd32=vadd[u]h(Rs32,Rt32:sat]
+def Hexagon_A2_svaddh:
+ si_ALU32_sisi <"vaddh", int_hexagon_A2_svaddh>;
+def Hexagon_A2_svaddhs:
+ si_ALU32_sisi_sat <"vaddh", int_hexagon_A2_svaddhs>;
+def Hexagon_A2_svadduhs:
+ si_ALU32_sisi_sat <"vadduh", int_hexagon_A2_svadduhs>;
+
+// ALU32 / VH / Vector average halfwords.
+def Hexagon_A2_svavgh:
+ si_ALU32_sisi <"vavgh", int_hexagon_A2_svavgh>;
+def Hexagon_A2_svavghs:
+ si_ALU32_sisi_rnd <"vavgh", int_hexagon_A2_svavghs>;
+def Hexagon_A2_svnavgh:
+ si_ALU32_sisi <"vnavgh", int_hexagon_A2_svnavgh>;
+
+// ALU32 / VH / Vector subtract halfwords.
+def Hexagon_A2_svsubh:
+ si_ALU32_sisi <"vsubh", int_hexagon_A2_svsubh>;
+def Hexagon_A2_svsubhs:
+ si_ALU32_sisi_sat <"vsubh", int_hexagon_A2_svsubhs>;
+def Hexagon_A2_svsubuhs:
+ si_ALU32_sisi_sat <"vsubuh", int_hexagon_A2_svsubuhs>;
+
+/********************************************************************
+* ALU64/ALU *
+*********************************************************************/
+
+// ALU64 / ALU / Add.
+def Hexagon_A2_addp:
+ di_ALU64_didi <"add", int_hexagon_A2_addp>;
+def Hexagon_A2_addsat:
+ si_ALU64_sisi_sat <"add", int_hexagon_A2_addsat>;
+
+// ALU64 / ALU / Add halfword.
+// Even though the definition says hl, it should be lh -
+//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits.
+def Hexagon_A2_addh_l16_hl:
+ si_ALU64_sisi_l16_lh <"add", int_hexagon_A2_addh_l16_hl>;
+def Hexagon_A2_addh_l16_ll:
+ si_ALU64_sisi_l16_ll <"add", int_hexagon_A2_addh_l16_ll>;
+
+def Hexagon_A2_addh_l16_sat_hl:
+ si_ALU64_sisi_l16_sat_lh <"add", int_hexagon_A2_addh_l16_sat_hl>;
+def Hexagon_A2_addh_l16_sat_ll:
+ si_ALU64_sisi_l16_sat_ll <"add", int_hexagon_A2_addh_l16_sat_ll>;
+
+def Hexagon_A2_addh_h16_hh:
+ si_ALU64_sisi_h16_hh <"add", int_hexagon_A2_addh_h16_hh>;
+def Hexagon_A2_addh_h16_hl:
+ si_ALU64_sisi_h16_hl <"add", int_hexagon_A2_addh_h16_hl>;
+def Hexagon_A2_addh_h16_lh:
+ si_ALU64_sisi_h16_lh <"add", int_hexagon_A2_addh_h16_lh>;
+def Hexagon_A2_addh_h16_ll:
+ si_ALU64_sisi_h16_ll <"add", int_hexagon_A2_addh_h16_ll>;
+
+def Hexagon_A2_addh_h16_sat_hh:
+ si_ALU64_sisi_h16_sat_hh <"add", int_hexagon_A2_addh_h16_sat_hh>;
+def Hexagon_A2_addh_h16_sat_hl:
+ si_ALU64_sisi_h16_sat_hl <"add", int_hexagon_A2_addh_h16_sat_hl>;
+def Hexagon_A2_addh_h16_sat_lh:
+ si_ALU64_sisi_h16_sat_lh <"add", int_hexagon_A2_addh_h16_sat_lh>;
+def Hexagon_A2_addh_h16_sat_ll:
+ si_ALU64_sisi_h16_sat_ll <"add", int_hexagon_A2_addh_h16_sat_ll>;
+
+// ALU64 / ALU / Compare.
+def Hexagon_C2_cmpeqp:
+ qi_ALU64_didi <"cmp.eq", int_hexagon_C2_cmpeqp>;
+def Hexagon_C2_cmpgtp:
+ qi_ALU64_didi <"cmp.gt", int_hexagon_C2_cmpgtp>;
+def Hexagon_C2_cmpgtup:
+ qi_ALU64_didi <"cmp.gtu", int_hexagon_C2_cmpgtup>;
+
+// ALU64 / ALU / Logical operations.
+def Hexagon_A2_andp:
+ di_ALU64_didi <"and", int_hexagon_A2_andp>;
+def Hexagon_A2_orp:
+ di_ALU64_didi <"or", int_hexagon_A2_orp>;
+def Hexagon_A2_xorp:
+ di_ALU64_didi <"xor", int_hexagon_A2_xorp>;
+
+// ALU64 / ALU / Maximum.
+def Hexagon_A2_max:
+ si_ALU64_sisi <"max", int_hexagon_A2_max>;
+def Hexagon_A2_maxu:
+ si_ALU64_sisi <"maxu", int_hexagon_A2_maxu>;
+
+// ALU64 / ALU / Minimum.
+def Hexagon_A2_min:
+ si_ALU64_sisi <"min", int_hexagon_A2_min>;
+def Hexagon_A2_minu:
+ si_ALU64_sisi <"minu", int_hexagon_A2_minu>;
+
+// ALU64 / ALU / Subtract.
+def Hexagon_A2_subp:
+ di_ALU64_didi <"sub", int_hexagon_A2_subp>;
+def Hexagon_A2_subsat:
+ si_ALU64_sisi_sat <"sub", int_hexagon_A2_subsat>;
+
+// ALU64 / ALU / Subtract halfword.
+// Even though the definition says hl, it should be lh -
+//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits.
+def Hexagon_A2_subh_l16_hl:
+ si_ALU64_sisi_l16_lh <"sub", int_hexagon_A2_subh_l16_hl>;
+def Hexagon_A2_subh_l16_ll:
+ si_ALU64_sisi_l16_ll <"sub", int_hexagon_A2_subh_l16_ll>;
+
+def Hexagon_A2_subh_l16_sat_hl:
+ si_ALU64_sisi_l16_sat_lh <"sub", int_hexagon_A2_subh_l16_sat_hl>;
+def Hexagon_A2_subh_l16_sat_ll:
+ si_ALU64_sisi_l16_sat_ll <"sub", int_hexagon_A2_subh_l16_sat_ll>;
+
+def Hexagon_A2_subh_h16_hh:
+ si_ALU64_sisi_h16_hh <"sub", int_hexagon_A2_subh_h16_hh>;
+def Hexagon_A2_subh_h16_hl:
+ si_ALU64_sisi_h16_hl <"sub", int_hexagon_A2_subh_h16_hl>;
+def Hexagon_A2_subh_h16_lh:
+ si_ALU64_sisi_h16_lh <"sub", int_hexagon_A2_subh_h16_lh>;
+def Hexagon_A2_subh_h16_ll:
+ si_ALU64_sisi_h16_ll <"sub", int_hexagon_A2_subh_h16_ll>;
+
+def Hexagon_A2_subh_h16_sat_hh:
+ si_ALU64_sisi_h16_sat_hh <"sub", int_hexagon_A2_subh_h16_sat_hh>;
+def Hexagon_A2_subh_h16_sat_hl:
+ si_ALU64_sisi_h16_sat_hl <"sub", int_hexagon_A2_subh_h16_sat_hl>;
+def Hexagon_A2_subh_h16_sat_lh:
+ si_ALU64_sisi_h16_sat_lh <"sub", int_hexagon_A2_subh_h16_sat_lh>;
+def Hexagon_A2_subh_h16_sat_ll:
+ si_ALU64_sisi_h16_sat_ll <"sub", int_hexagon_A2_subh_h16_sat_ll>;
+
+// ALU64 / ALU / Transfer register.
+def Hexagon_A2_tfrp:
+ di_ALU64_di <"", int_hexagon_A2_tfrp>;
+
+/********************************************************************
+* ALU64/BIT *
+*********************************************************************/
+
+// ALU64 / BIT / Masked parity.
+def Hexagon_S2_parityp:
+ si_ALU64_didi <"parity", int_hexagon_S2_parityp>;
+
+/********************************************************************
+* ALU64/PERM *
+*********************************************************************/
+
+// ALU64 / PERM / Vector pack high and low halfwords.
+def Hexagon_S2_packhl:
+ di_ALU64_sisi <"packhl", int_hexagon_S2_packhl>;
+
+/********************************************************************
+* ALU64/VB *
+*********************************************************************/
+
+// ALU64 / VB / Vector add unsigned bytes.
+def Hexagon_A2_vaddub:
+ di_ALU64_didi <"vaddub", int_hexagon_A2_vaddub>;
+def Hexagon_A2_vaddubs:
+ di_ALU64_didi_sat <"vaddub", int_hexagon_A2_vaddubs>;
+
+// ALU64 / VB / Vector average unsigned bytes.
+def Hexagon_A2_vavgub:
+ di_ALU64_didi <"vavgub", int_hexagon_A2_vavgub>;
+def Hexagon_A2_vavgubr:
+ di_ALU64_didi_rnd <"vavgub", int_hexagon_A2_vavgubr>;
+
+// ALU64 / VB / Vector compare unsigned bytes.
+def Hexagon_A2_vcmpbeq:
+ qi_ALU64_didi <"vcmpb.eq", int_hexagon_A2_vcmpbeq>;
+def Hexagon_A2_vcmpbgtu:
+ qi_ALU64_didi <"vcmpb.gtu",int_hexagon_A2_vcmpbgtu>;
+
+// ALU64 / VB / Vector maximum/minimum unsigned bytes.
+def Hexagon_A2_vmaxub:
+ di_ALU64_didi <"vmaxub", int_hexagon_A2_vmaxub>;
+def Hexagon_A2_vminub:
+ di_ALU64_didi <"vminub", int_hexagon_A2_vminub>;
+
+// ALU64 / VB / Vector subtract unsigned bytes.
+def Hexagon_A2_vsubub:
+ di_ALU64_didi <"vsubub", int_hexagon_A2_vsubub>;
+def Hexagon_A2_vsububs:
+ di_ALU64_didi_sat <"vsubub", int_hexagon_A2_vsububs>;
+
+// ALU64 / VB / Vector mux.
+def Hexagon_C2_vmux:
+ di_ALU64_qididi <"vmux", int_hexagon_C2_vmux>;
+
+
+/********************************************************************
+* ALU64/VH *
+*********************************************************************/
+
+// ALU64 / VH / Vector add halfwords.
+// Rdd64=vadd[u]h(Rss64,Rtt64:sat]
+def Hexagon_A2_vaddh:
+ di_ALU64_didi <"vaddh", int_hexagon_A2_vaddh>;
+def Hexagon_A2_vaddhs:
+ di_ALU64_didi_sat <"vaddh", int_hexagon_A2_vaddhs>;
+def Hexagon_A2_vadduhs:
+ di_ALU64_didi_sat <"vadduh", int_hexagon_A2_vadduhs>;
+
+// ALU64 / VH / Vector average halfwords.
+// Rdd64=v[n]avg[u]h(Rss64,Rtt64:rnd/:crnd][:sat]
+def Hexagon_A2_vavgh:
+ di_ALU64_didi <"vavgh", int_hexagon_A2_vavgh>;
+def Hexagon_A2_vavghcr:
+ di_ALU64_didi_crnd <"vavgh", int_hexagon_A2_vavghcr>;
+def Hexagon_A2_vavghr:
+ di_ALU64_didi_rnd <"vavgh", int_hexagon_A2_vavghr>;
+def Hexagon_A2_vavguh:
+ di_ALU64_didi <"vavguh", int_hexagon_A2_vavguh>;
+def Hexagon_A2_vavguhr:
+ di_ALU64_didi_rnd <"vavguh", int_hexagon_A2_vavguhr>;
+def Hexagon_A2_vnavgh:
+ di_ALU64_didi <"vnavgh", int_hexagon_A2_vnavgh>;
+def Hexagon_A2_vnavghcr:
+ di_ALU64_didi_crnd_sat <"vnavgh", int_hexagon_A2_vnavghcr>;
+def Hexagon_A2_vnavghr:
+ di_ALU64_didi_rnd_sat <"vnavgh", int_hexagon_A2_vnavghr>;
+
+// ALU64 / VH / Vector compare halfwords.
+def Hexagon_A2_vcmpheq:
+ qi_ALU64_didi <"vcmph.eq", int_hexagon_A2_vcmpheq>;
+def Hexagon_A2_vcmphgt:
+ qi_ALU64_didi <"vcmph.gt", int_hexagon_A2_vcmphgt>;
+def Hexagon_A2_vcmphgtu:
+ qi_ALU64_didi <"vcmph.gtu",int_hexagon_A2_vcmphgtu>;
+
+// ALU64 / VH / Vector maximum halfwords.
+def Hexagon_A2_vmaxh:
+ di_ALU64_didi <"vmaxh", int_hexagon_A2_vmaxh>;
+def Hexagon_A2_vmaxuh:
+ di_ALU64_didi <"vmaxuh", int_hexagon_A2_vmaxuh>;
+
+// ALU64 / VH / Vector minimum halfwords.
+def Hexagon_A2_vminh:
+ di_ALU64_didi <"vminh", int_hexagon_A2_vminh>;
+def Hexagon_A2_vminuh:
+ di_ALU64_didi <"vminuh", int_hexagon_A2_vminuh>;
+
+// ALU64 / VH / Vector subtract halfwords.
+def Hexagon_A2_vsubh:
+ di_ALU64_didi <"vsubh", int_hexagon_A2_vsubh>;
+def Hexagon_A2_vsubhs:
+ di_ALU64_didi_sat <"vsubh", int_hexagon_A2_vsubhs>;
+def Hexagon_A2_vsubuhs:
+ di_ALU64_didi_sat <"vsubuh", int_hexagon_A2_vsubuhs>;
+
+
+/********************************************************************
+* ALU64/VW *
+*********************************************************************/
+
+// ALU64 / VW / Vector add words.
+// Rdd32=vaddw(Rss32,Rtt32)[:sat]
+def Hexagon_A2_vaddw:
+ di_ALU64_didi <"vaddw", int_hexagon_A2_vaddw>;
+def Hexagon_A2_vaddws:
+ di_ALU64_didi_sat <"vaddw", int_hexagon_A2_vaddws>;
+
+// ALU64 / VW / Vector average words.
+def Hexagon_A2_vavguw:
+ di_ALU64_didi <"vavguw", int_hexagon_A2_vavguw>;
+def Hexagon_A2_vavguwr:
+ di_ALU64_didi_rnd <"vavguw", int_hexagon_A2_vavguwr>;
+def Hexagon_A2_vavgw:
+ di_ALU64_didi <"vavgw", int_hexagon_A2_vavgw>;
+def Hexagon_A2_vavgwcr:
+ di_ALU64_didi_crnd <"vavgw", int_hexagon_A2_vavgwcr>;
+def Hexagon_A2_vavgwr:
+ di_ALU64_didi_rnd <"vavgw", int_hexagon_A2_vavgwr>;
+def Hexagon_A2_vnavgw:
+ di_ALU64_didi <"vnavgw", int_hexagon_A2_vnavgw>;
+def Hexagon_A2_vnavgwcr:
+ di_ALU64_didi_crnd_sat <"vnavgw", int_hexagon_A2_vnavgwcr>;
+def Hexagon_A2_vnavgwr:
+ di_ALU64_didi_rnd_sat <"vnavgw", int_hexagon_A2_vnavgwr>;
+
+// ALU64 / VW / Vector compare words.
+def Hexagon_A2_vcmpweq:
+ qi_ALU64_didi <"vcmpw.eq", int_hexagon_A2_vcmpweq>;
+def Hexagon_A2_vcmpwgt:
+ qi_ALU64_didi <"vcmpw.gt", int_hexagon_A2_vcmpwgt>;
+def Hexagon_A2_vcmpwgtu:
+ qi_ALU64_didi <"vcmpw.gtu",int_hexagon_A2_vcmpwgtu>;
+
+// ALU64 / VW / Vector maximum words.
+def Hexagon_A2_vmaxw:
+ di_ALU64_didi <"vmaxw", int_hexagon_A2_vmaxw>;
+def Hexagon_A2_vmaxuw:
+ di_ALU64_didi <"vmaxuw", int_hexagon_A2_vmaxuw>;
+
+// ALU64 / VW / Vector minimum words.
+def Hexagon_A2_vminw:
+ di_ALU64_didi <"vminw", int_hexagon_A2_vminw>;
+def Hexagon_A2_vminuw:
+ di_ALU64_didi <"vminuw", int_hexagon_A2_vminuw>;
+
+// ALU64 / VW / Vector subtract words.
+def Hexagon_A2_vsubw:
+ di_ALU64_didi <"vsubw", int_hexagon_A2_vsubw>;
+def Hexagon_A2_vsubws:
+ di_ALU64_didi_sat <"vsubw", int_hexagon_A2_vsubws>;
+
+
+/********************************************************************
+* CR *
+*********************************************************************/
+
+// CR / Logical reductions on predicates.
+def Hexagon_C2_all8:
+ qi_SInst_qi <"all8", int_hexagon_C2_all8>;
+def Hexagon_C2_any8:
+ qi_SInst_qi <"any8", int_hexagon_C2_any8>;
+
+// CR / Logical operations on predicates.
+def Hexagon_C2_pxfer_map:
+ qi_SInst_qi_pxfer <"", int_hexagon_C2_pxfer_map>;
+def Hexagon_C2_and:
+ qi_SInst_qiqi <"and", int_hexagon_C2_and>;
+def Hexagon_C2_andn:
+ qi_SInst_qiqi_neg <"and", int_hexagon_C2_andn>;
+def Hexagon_C2_not:
+ qi_SInst_qi <"not", int_hexagon_C2_not>;
+def Hexagon_C2_or:
+ qi_SInst_qiqi <"or", int_hexagon_C2_or>;
+def Hexagon_C2_orn:
+ qi_SInst_qiqi_neg <"or", int_hexagon_C2_orn>;
+def Hexagon_C2_xor:
+ qi_SInst_qiqi <"xor", int_hexagon_C2_xor>;
+
+
+/********************************************************************
+* MTYPE/ALU *
+*********************************************************************/
+
+// MTYPE / ALU / Add and accumulate.
+def Hexagon_M2_acci:
+ si_MInst_sisisi_acc <"add", int_hexagon_M2_acci>;
+def Hexagon_M2_accii:
+ si_MInst_sisis8_acc <"add", int_hexagon_M2_accii>;
+def Hexagon_M2_nacci:
+ si_MInst_sisisi_nac <"add", int_hexagon_M2_nacci>;
+def Hexagon_M2_naccii:
+ si_MInst_sisis8_nac <"add", int_hexagon_M2_naccii>;
+
+// MTYPE / ALU / Subtract and accumulate.
+def Hexagon_M2_subacc:
+ si_MInst_sisisi_acc <"sub", int_hexagon_M2_subacc>;
+
+// MTYPE / ALU / Vector absolute difference.
+def Hexagon_M2_vabsdiffh:
+ di_MInst_didi <"vabsdiffh",int_hexagon_M2_vabsdiffh>;
+def Hexagon_M2_vabsdiffw:
+ di_MInst_didi <"vabsdiffw",int_hexagon_M2_vabsdiffw>;
+
+// MTYPE / ALU / XOR and xor with destination.
+def Hexagon_M2_xor_xacc:
+ si_MInst_sisisi_xacc <"xor", int_hexagon_M2_xor_xacc>;
+
+
+/********************************************************************
+* MTYPE/COMPLEX *
+*********************************************************************/
+
+// MTYPE / COMPLEX / Complex multiply.
+// Rdd[-+]=cmpy(Rs, Rt:<<1]:sat
+def Hexagon_M2_cmpys_s1:
+ di_MInst_sisi_s1_sat <"cmpy", int_hexagon_M2_cmpys_s1>;
+def Hexagon_M2_cmpys_s0:
+ di_MInst_sisi_sat <"cmpy", int_hexagon_M2_cmpys_s0>;
+def Hexagon_M2_cmpysc_s1:
+ di_MInst_sisi_s1_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s1>;
+def Hexagon_M2_cmpysc_s0:
+ di_MInst_sisi_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s0>;
+
+def Hexagon_M2_cmacs_s1:
+ di_MInst_disisi_acc_s1_sat <"cmpy", int_hexagon_M2_cmacs_s1>;
+def Hexagon_M2_cmacs_s0:
+ di_MInst_disisi_acc_sat <"cmpy", int_hexagon_M2_cmacs_s0>;
+def Hexagon_M2_cmacsc_s1:
+ di_MInst_disisi_acc_s1_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s1>;
+def Hexagon_M2_cmacsc_s0:
+ di_MInst_disisi_acc_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s0>;
+
+def Hexagon_M2_cnacs_s1:
+ di_MInst_disisi_nac_s1_sat <"cmpy", int_hexagon_M2_cnacs_s1>;
+def Hexagon_M2_cnacs_s0:
+ di_MInst_disisi_nac_sat <"cmpy", int_hexagon_M2_cnacs_s0>;
+def Hexagon_M2_cnacsc_s1:
+ di_MInst_disisi_nac_s1_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s1>;
+def Hexagon_M2_cnacsc_s0:
+ di_MInst_disisi_nac_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s0>;
+
+// MTYPE / COMPLEX / Complex multiply real or imaginary.
+def Hexagon_M2_cmpyr_s0:
+ di_MInst_sisi <"cmpyr", int_hexagon_M2_cmpyr_s0>;
+def Hexagon_M2_cmacr_s0:
+ di_MInst_disisi_acc <"cmpyr", int_hexagon_M2_cmacr_s0>;
+
+def Hexagon_M2_cmpyi_s0:
+ di_MInst_sisi <"cmpyi", int_hexagon_M2_cmpyi_s0>;
+def Hexagon_M2_cmaci_s0:
+ di_MInst_disisi_acc <"cmpyi", int_hexagon_M2_cmaci_s0>;
+
+// MTYPE / COMPLEX / Complex multiply with round and pack.
+// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat
+def Hexagon_M2_cmpyrs_s0:
+ si_MInst_sisi_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s0>;
+def Hexagon_M2_cmpyrs_s1:
+ si_MInst_sisi_s1_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s1>;
+
+def Hexagon_M2_cmpyrsc_s0:
+ si_MInst_sisi_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s0>;
+def Hexagon_M2_cmpyrsc_s1:
+ si_MInst_sisi_s1_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s1>;
+
+//MTYPE / COMPLEX / Vector complex multiply real or imaginary.
+def Hexagon_M2_vcmpy_s0_sat_i:
+ di_MInst_didi_sat <"vcmpyi", int_hexagon_M2_vcmpy_s0_sat_i>;
+def Hexagon_M2_vcmpy_s1_sat_i:
+ di_MInst_didi_s1_sat <"vcmpyi", int_hexagon_M2_vcmpy_s1_sat_i>;
+
+def Hexagon_M2_vcmpy_s0_sat_r:
+ di_MInst_didi_sat <"vcmpyr", int_hexagon_M2_vcmpy_s0_sat_r>;
+def Hexagon_M2_vcmpy_s1_sat_r:
+ di_MInst_didi_s1_sat <"vcmpyr", int_hexagon_M2_vcmpy_s1_sat_r>;
+
+def Hexagon_M2_vcmac_s0_sat_i:
+ di_MInst_dididi_acc_sat <"vcmpyi", int_hexagon_M2_vcmac_s0_sat_i>;
+def Hexagon_M2_vcmac_s0_sat_r:
+ di_MInst_dididi_acc_sat <"vcmpyr", int_hexagon_M2_vcmac_s0_sat_r>;
+
+//MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary.
+def Hexagon_M2_vrcmpyi_s0:
+ di_MInst_didi <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0>;
+def Hexagon_M2_vrcmpyr_s0:
+ di_MInst_didi <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0>;
+
+def Hexagon_M2_vrcmpyi_s0c:
+ di_MInst_didi_conj <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0c>;
+def Hexagon_M2_vrcmpyr_s0c:
+ di_MInst_didi_conj <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0c>;
+
+def Hexagon_M2_vrcmaci_s0:
+ di_MInst_dididi_acc <"vrcmpyi", int_hexagon_M2_vrcmaci_s0>;
+def Hexagon_M2_vrcmacr_s0:
+ di_MInst_dididi_acc <"vrcmpyr", int_hexagon_M2_vrcmacr_s0>;
+
+def Hexagon_M2_vrcmaci_s0c:
+ di_MInst_dididi_acc_conj <"vrcmpyi", int_hexagon_M2_vrcmaci_s0c>;
+def Hexagon_M2_vrcmacr_s0c:
+ di_MInst_dididi_acc_conj <"vrcmpyr", int_hexagon_M2_vrcmacr_s0c>;
+
+
+/********************************************************************
+* MTYPE/MPYH *
+*********************************************************************/
+
+// MTYPE / MPYH / Multiply and use lower result.
+//def Hexagon_M2_mpysmi:
+// si_MInst_sim9 <"mpyi", int_hexagon_M2_mpysmi>;
+def Hexagon_M2_mpyi:
+ si_MInst_sisi <"mpyi", int_hexagon_M2_mpyi>;
+def Hexagon_M2_mpyui:
+ si_MInst_sisi <"mpyui", int_hexagon_M2_mpyui>;
+def Hexagon_M2_macsip:
+ si_MInst_sisiu8_acc <"mpyi", int_hexagon_M2_macsip>;
+def Hexagon_M2_maci:
+ si_MInst_sisisi_acc <"mpyi", int_hexagon_M2_maci>;
+def Hexagon_M2_macsin:
+ si_MInst_sisiu8_nac <"mpyi", int_hexagon_M2_macsin>;
+
+// MTYPE / MPYH / Multiply word by half (32x16).
+//Rdd[+]=vmpywoh(Rss,Rtt)[:<<1][:rnd][:sat]
+//Rdd[+]=vmpyweh(Rss,Rtt)[:<<1][:rnd][:sat]
+def Hexagon_M2_mmpyl_rs1:
+ di_MInst_didi_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs1>;
+def Hexagon_M2_mmpyl_s1:
+ di_MInst_didi_s1_sat <"vmpyweh", int_hexagon_M2_mmpyl_s1>;
+def Hexagon_M2_mmpyl_rs0:
+ di_MInst_didi_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs0>;
+def Hexagon_M2_mmpyl_s0:
+ di_MInst_didi_sat <"vmpyweh", int_hexagon_M2_mmpyl_s0>;
+def Hexagon_M2_mmpyh_rs1:
+ di_MInst_didi_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs1>;
+def Hexagon_M2_mmpyh_s1:
+ di_MInst_didi_s1_sat <"vmpywoh", int_hexagon_M2_mmpyh_s1>;
+def Hexagon_M2_mmpyh_rs0:
+ di_MInst_didi_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs0>;
+def Hexagon_M2_mmpyh_s0:
+ di_MInst_didi_sat <"vmpywoh", int_hexagon_M2_mmpyh_s0>;
+def Hexagon_M2_mmacls_rs1:
+ di_MInst_dididi_acc_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs1>;
+def Hexagon_M2_mmacls_s1:
+ di_MInst_dididi_acc_s1_sat <"vmpyweh", int_hexagon_M2_mmacls_s1>;
+def Hexagon_M2_mmacls_rs0:
+ di_MInst_dididi_acc_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs0>;
+def Hexagon_M2_mmacls_s0:
+ di_MInst_dididi_acc_sat <"vmpyweh", int_hexagon_M2_mmacls_s0>;
+def Hexagon_M2_mmachs_rs1:
+ di_MInst_dididi_acc_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs1>;
+def Hexagon_M2_mmachs_s1:
+ di_MInst_dididi_acc_s1_sat <"vmpywoh", int_hexagon_M2_mmachs_s1>;
+def Hexagon_M2_mmachs_rs0:
+ di_MInst_dididi_acc_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs0>;
+def Hexagon_M2_mmachs_s0:
+ di_MInst_dididi_acc_sat <"vmpywoh", int_hexagon_M2_mmachs_s0>;
+
+// MTYPE / MPYH / Multiply word by unsigned half (32x16).
+//Rdd[+]=vmpywouh(Rss,Rtt)[:<<1][:rnd][:sat]
+//Rdd[+]=vmpyweuh(Rss,Rtt)[:<<1][:rnd][:sat]
+def Hexagon_M2_mmpyul_rs1:
+ di_MInst_didi_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs1>;
+def Hexagon_M2_mmpyul_s1:
+ di_MInst_didi_s1_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s1>;
+def Hexagon_M2_mmpyul_rs0:
+ di_MInst_didi_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs0>;
+def Hexagon_M2_mmpyul_s0:
+ di_MInst_didi_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s0>;
+def Hexagon_M2_mmpyuh_rs1:
+ di_MInst_didi_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs1>;
+def Hexagon_M2_mmpyuh_s1:
+ di_MInst_didi_s1_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s1>;
+def Hexagon_M2_mmpyuh_rs0:
+ di_MInst_didi_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs0>;
+def Hexagon_M2_mmpyuh_s0:
+ di_MInst_didi_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s0>;
+def Hexagon_M2_mmaculs_rs1:
+ di_MInst_dididi_acc_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs1>;
+def Hexagon_M2_mmaculs_s1:
+ di_MInst_dididi_acc_s1_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s1>;
+def Hexagon_M2_mmaculs_rs0:
+ di_MInst_dididi_acc_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs0>;
+def Hexagon_M2_mmaculs_s0:
+ di_MInst_dididi_acc_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s0>;
+def Hexagon_M2_mmacuhs_rs1:
+ di_MInst_dididi_acc_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs1>;
+def Hexagon_M2_mmacuhs_s1:
+ di_MInst_dididi_acc_s1_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s1>;
+def Hexagon_M2_mmacuhs_rs0:
+ di_MInst_dididi_acc_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs0>;
+def Hexagon_M2_mmacuhs_s0:
+ di_MInst_dididi_acc_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s0>;
+
+// MTYPE / MPYH / Multiply and use upper result.
+def Hexagon_M2_hmmpyh_rs1:
+ si_MInst_sisi_h_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyh_rs1>;
+def Hexagon_M2_hmmpyl_rs1:
+ si_MInst_sisi_l_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyl_rs1>;
+def Hexagon_M2_mpy_up:
+ si_MInst_sisi <"mpy", int_hexagon_M2_mpy_up>;
+def Hexagon_M2_dpmpyss_rnd_s0:
+ si_MInst_sisi_rnd <"mpy", int_hexagon_M2_dpmpyss_rnd_s0>;
+def Hexagon_M2_mpyu_up:
+ si_MInst_sisi <"mpyu", int_hexagon_M2_mpyu_up>;
+
+// MTYPE / MPYH / Multiply and use full result.
+def Hexagon_M2_dpmpyuu_s0:
+ di_MInst_sisi <"mpyu", int_hexagon_M2_dpmpyuu_s0>;
+def Hexagon_M2_dpmpyuu_acc_s0:
+ di_MInst_disisi_acc <"mpyu", int_hexagon_M2_dpmpyuu_acc_s0>;
+def Hexagon_M2_dpmpyuu_nac_s0:
+ di_MInst_disisi_nac <"mpyu", int_hexagon_M2_dpmpyuu_nac_s0>;
+def Hexagon_M2_dpmpyss_s0:
+ di_MInst_sisi <"mpy", int_hexagon_M2_dpmpyss_s0>;
+def Hexagon_M2_dpmpyss_acc_s0:
+ di_MInst_disisi_acc <"mpy", int_hexagon_M2_dpmpyss_acc_s0>;
+def Hexagon_M2_dpmpyss_nac_s0:
+ di_MInst_disisi_nac <"mpy", int_hexagon_M2_dpmpyss_nac_s0>;
+
+
+/********************************************************************
+* MTYPE/MPYS *
+*********************************************************************/
+
+// MTYPE / MPYS / Scalar 16x16 multiply signed.
+//Rd=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]|
+// [:<<0[:rnd|:sat|:rnd:sat]|:<<1[:rnd|:sat|:rnd:sat]]]
+def Hexagon_M2_mpy_hh_s0:
+ si_MInst_sisi_hh <"mpy", int_hexagon_M2_mpy_hh_s0>;
+def Hexagon_M2_mpy_hh_s1:
+ si_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpy_hh_s1>;
+def Hexagon_M2_mpy_rnd_hh_s1:
+ si_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_rnd_hh_s1>;
+def Hexagon_M2_mpy_sat_rnd_hh_s1:
+ si_MInst_sisi_sat_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s1>;
+def Hexagon_M2_mpy_sat_hh_s1:
+ si_MInst_sisi_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_hh_s1>;
+def Hexagon_M2_mpy_rnd_hh_s0:
+ si_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpy_rnd_hh_s0>;
+def Hexagon_M2_mpy_sat_rnd_hh_s0:
+ si_MInst_sisi_sat_rnd_hh <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s0>;
+def Hexagon_M2_mpy_sat_hh_s0:
+ si_MInst_sisi_sat_hh <"mpy", int_hexagon_M2_mpy_sat_hh_s0>;
+
+def Hexagon_M2_mpy_hl_s0:
+ si_MInst_sisi_hl <"mpy", int_hexagon_M2_mpy_hl_s0>;
+def Hexagon_M2_mpy_hl_s1:
+ si_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpy_hl_s1>;
+def Hexagon_M2_mpy_rnd_hl_s1:
+ si_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_rnd_hl_s1>;
+def Hexagon_M2_mpy_sat_rnd_hl_s1:
+ si_MInst_sisi_sat_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s1>;
+def Hexagon_M2_mpy_sat_hl_s1:
+ si_MInst_sisi_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_hl_s1>;
+def Hexagon_M2_mpy_rnd_hl_s0:
+ si_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpy_rnd_hl_s0>;
+def Hexagon_M2_mpy_sat_rnd_hl_s0:
+ si_MInst_sisi_sat_rnd_hl <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s0>;
+def Hexagon_M2_mpy_sat_hl_s0:
+ si_MInst_sisi_sat_hl <"mpy", int_hexagon_M2_mpy_sat_hl_s0>;
+
+def Hexagon_M2_mpy_lh_s0:
+ si_MInst_sisi_lh <"mpy", int_hexagon_M2_mpy_lh_s0>;
+def Hexagon_M2_mpy_lh_s1:
+ si_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpy_lh_s1>;
+def Hexagon_M2_mpy_rnd_lh_s1:
+ si_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_rnd_lh_s1>;
+def Hexagon_M2_mpy_sat_rnd_lh_s1:
+ si_MInst_sisi_sat_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s1>;
+def Hexagon_M2_mpy_sat_lh_s1:
+ si_MInst_sisi_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_lh_s1>;
+def Hexagon_M2_mpy_rnd_lh_s0:
+ si_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpy_rnd_lh_s0>;
+def Hexagon_M2_mpy_sat_rnd_lh_s0:
+ si_MInst_sisi_sat_rnd_lh <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s0>;
+def Hexagon_M2_mpy_sat_lh_s0:
+ si_MInst_sisi_sat_lh <"mpy", int_hexagon_M2_mpy_sat_lh_s0>;
+
+def Hexagon_M2_mpy_ll_s0:
+ si_MInst_sisi_ll <"mpy", int_hexagon_M2_mpy_ll_s0>;
+def Hexagon_M2_mpy_ll_s1:
+ si_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpy_ll_s1>;
+def Hexagon_M2_mpy_rnd_ll_s1:
+ si_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_rnd_ll_s1>;
+def Hexagon_M2_mpy_sat_rnd_ll_s1:
+ si_MInst_sisi_sat_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s1>;
+def Hexagon_M2_mpy_sat_ll_s1:
+ si_MInst_sisi_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_ll_s1>;
+def Hexagon_M2_mpy_rnd_ll_s0:
+ si_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpy_rnd_ll_s0>;
+def Hexagon_M2_mpy_sat_rnd_ll_s0:
+ si_MInst_sisi_sat_rnd_ll <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s0>;
+def Hexagon_M2_mpy_sat_ll_s0:
+ si_MInst_sisi_sat_ll <"mpy", int_hexagon_M2_mpy_sat_ll_s0>;
+
+//Rdd=mpy(Rs.[H|L],Rt.[H|L])[[:<<0|:<<1]|[:<<0:rnd|:<<1:rnd]]
+def Hexagon_M2_mpyd_hh_s0:
+ di_MInst_sisi_hh <"mpy", int_hexagon_M2_mpyd_hh_s0>;
+def Hexagon_M2_mpyd_hh_s1:
+ di_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpyd_hh_s1>;
+def Hexagon_M2_mpyd_rnd_hh_s1:
+ di_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hh_s1>;
+def Hexagon_M2_mpyd_rnd_hh_s0:
+ di_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpyd_rnd_hh_s0>;
+
+def Hexagon_M2_mpyd_hl_s0:
+ di_MInst_sisi_hl <"mpy", int_hexagon_M2_mpyd_hl_s0>;
+def Hexagon_M2_mpyd_hl_s1:
+ di_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpyd_hl_s1>;
+def Hexagon_M2_mpyd_rnd_hl_s1:
+ di_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hl_s1>;
+def Hexagon_M2_mpyd_rnd_hl_s0:
+ di_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpyd_rnd_hl_s0>;
+
+def Hexagon_M2_mpyd_lh_s0:
+ di_MInst_sisi_lh <"mpy", int_hexagon_M2_mpyd_lh_s0>;
+def Hexagon_M2_mpyd_lh_s1:
+ di_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpyd_lh_s1>;
+def Hexagon_M2_mpyd_rnd_lh_s1:
+ di_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_lh_s1>;
+def Hexagon_M2_mpyd_rnd_lh_s0:
+ di_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpyd_rnd_lh_s0>;
+
+def Hexagon_M2_mpyd_ll_s0:
+ di_MInst_sisi_ll <"mpy", int_hexagon_M2_mpyd_ll_s0>;
+def Hexagon_M2_mpyd_ll_s1:
+ di_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpyd_ll_s1>;
+def Hexagon_M2_mpyd_rnd_ll_s1:
+ di_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpyd_rnd_ll_s1>;
+def Hexagon_M2_mpyd_rnd_ll_s0:
+ di_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpyd_rnd_ll_s0>;
+
+//Rx+=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]]
+def Hexagon_M2_mpy_acc_hh_s0:
+ si_MInst_sisisi_acc_hh <"mpy", int_hexagon_M2_mpy_acc_hh_s0>;
+def Hexagon_M2_mpy_acc_hh_s1:
+ si_MInst_sisisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_hh_s1>;
+def Hexagon_M2_mpy_acc_sat_hh_s1:
+ si_MInst_sisisi_acc_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s1>;
+def Hexagon_M2_mpy_acc_sat_hh_s0:
+ si_MInst_sisisi_acc_sat_hh <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s0>;
+
+def Hexagon_M2_mpy_acc_hl_s0:
+ si_MInst_sisisi_acc_hl <"mpy", int_hexagon_M2_mpy_acc_hl_s0>;
+def Hexagon_M2_mpy_acc_hl_s1:
+ si_MInst_sisisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_hl_s1>;
+def Hexagon_M2_mpy_acc_sat_hl_s1:
+ si_MInst_sisisi_acc_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s1>;
+def Hexagon_M2_mpy_acc_sat_hl_s0:
+ si_MInst_sisisi_acc_sat_hl <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s0>;
+
+def Hexagon_M2_mpy_acc_lh_s0:
+ si_MInst_sisisi_acc_lh <"mpy", int_hexagon_M2_mpy_acc_lh_s0>;
+def Hexagon_M2_mpy_acc_lh_s1:
+ si_MInst_sisisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_lh_s1>;
+def Hexagon_M2_mpy_acc_sat_lh_s1:
+ si_MInst_sisisi_acc_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s1>;
+def Hexagon_M2_mpy_acc_sat_lh_s0:
+ si_MInst_sisisi_acc_sat_lh <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s0>;
+
+def Hexagon_M2_mpy_acc_ll_s0:
+ si_MInst_sisisi_acc_ll <"mpy", int_hexagon_M2_mpy_acc_ll_s0>;
+def Hexagon_M2_mpy_acc_ll_s1:
+ si_MInst_sisisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_ll_s1>;
+def Hexagon_M2_mpy_acc_sat_ll_s1:
+ si_MInst_sisisi_acc_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s1>;
+def Hexagon_M2_mpy_acc_sat_ll_s0:
+ si_MInst_sisisi_acc_sat_ll <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s0>;
+
+//Rx-=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]]
+def Hexagon_M2_mpy_nac_hh_s0:
+ si_MInst_sisisi_nac_hh <"mpy", int_hexagon_M2_mpy_nac_hh_s0>;
+def Hexagon_M2_mpy_nac_hh_s1:
+ si_MInst_sisisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_hh_s1>;
+def Hexagon_M2_mpy_nac_sat_hh_s1:
+ si_MInst_sisisi_nac_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s1>;
+def Hexagon_M2_mpy_nac_sat_hh_s0:
+ si_MInst_sisisi_nac_sat_hh <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s0>;
+
+def Hexagon_M2_mpy_nac_hl_s0:
+ si_MInst_sisisi_nac_hl <"mpy", int_hexagon_M2_mpy_nac_hl_s0>;
+def Hexagon_M2_mpy_nac_hl_s1:
+ si_MInst_sisisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_hl_s1>;
+def Hexagon_M2_mpy_nac_sat_hl_s1:
+ si_MInst_sisisi_nac_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s1>;
+def Hexagon_M2_mpy_nac_sat_hl_s0:
+ si_MInst_sisisi_nac_sat_hl <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s0>;
+
+def Hexagon_M2_mpy_nac_lh_s0:
+ si_MInst_sisisi_nac_lh <"mpy", int_hexagon_M2_mpy_nac_lh_s0>;
+def Hexagon_M2_mpy_nac_lh_s1:
+ si_MInst_sisisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_lh_s1>;
+def Hexagon_M2_mpy_nac_sat_lh_s1:
+ si_MInst_sisisi_nac_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s1>;
+def Hexagon_M2_mpy_nac_sat_lh_s0:
+ si_MInst_sisisi_nac_sat_lh <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s0>;
+
+def Hexagon_M2_mpy_nac_ll_s0:
+ si_MInst_sisisi_nac_ll <"mpy", int_hexagon_M2_mpy_nac_ll_s0>;
+def Hexagon_M2_mpy_nac_ll_s1:
+ si_MInst_sisisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_ll_s1>;
+def Hexagon_M2_mpy_nac_sat_ll_s1:
+ si_MInst_sisisi_nac_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s1>;
+def Hexagon_M2_mpy_nac_sat_ll_s0:
+ si_MInst_sisisi_nac_sat_ll <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s0>;
+
+//Rx+=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]
+def Hexagon_M2_mpyd_acc_hh_s0:
+ di_MInst_disisi_acc_hh <"mpy", int_hexagon_M2_mpyd_acc_hh_s0>;
+def Hexagon_M2_mpyd_acc_hh_s1:
+ di_MInst_disisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpyd_acc_hh_s1>;
+
+def Hexagon_M2_mpyd_acc_hl_s0:
+ di_MInst_disisi_acc_hl <"mpy", int_hexagon_M2_mpyd_acc_hl_s0>;
+def Hexagon_M2_mpyd_acc_hl_s1:
+ di_MInst_disisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpyd_acc_hl_s1>;
+
+def Hexagon_M2_mpyd_acc_lh_s0:
+ di_MInst_disisi_acc_lh <"mpy", int_hexagon_M2_mpyd_acc_lh_s0>;
+def Hexagon_M2_mpyd_acc_lh_s1:
+ di_MInst_disisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpyd_acc_lh_s1>;
+
+def Hexagon_M2_mpyd_acc_ll_s0:
+ di_MInst_disisi_acc_ll <"mpy", int_hexagon_M2_mpyd_acc_ll_s0>;
+def Hexagon_M2_mpyd_acc_ll_s1:
+ di_MInst_disisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpyd_acc_ll_s1>;
+
+//Rx-=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]
+def Hexagon_M2_mpyd_nac_hh_s0:
+ di_MInst_disisi_nac_hh <"mpy", int_hexagon_M2_mpyd_nac_hh_s0>;
+def Hexagon_M2_mpyd_nac_hh_s1:
+ di_MInst_disisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpyd_nac_hh_s1>;
+
+def Hexagon_M2_mpyd_nac_hl_s0:
+ di_MInst_disisi_nac_hl <"mpy", int_hexagon_M2_mpyd_nac_hl_s0>;
+def Hexagon_M2_mpyd_nac_hl_s1:
+ di_MInst_disisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpyd_nac_hl_s1>;
+
+def Hexagon_M2_mpyd_nac_lh_s0:
+ di_MInst_disisi_nac_lh <"mpy", int_hexagon_M2_mpyd_nac_lh_s0>;
+def Hexagon_M2_mpyd_nac_lh_s1:
+ di_MInst_disisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpyd_nac_lh_s1>;
+
+def Hexagon_M2_mpyd_nac_ll_s0:
+ di_MInst_disisi_nac_ll <"mpy", int_hexagon_M2_mpyd_nac_ll_s0>;
+def Hexagon_M2_mpyd_nac_ll_s1:
+ di_MInst_disisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpyd_nac_ll_s1>;
+
+// MTYPE / MPYS / Scalar 16x16 multiply unsigned.
+//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyu_hh_s0:
+ si_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyu_hh_s0>;
+def Hexagon_M2_mpyu_hh_s1:
+ si_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyu_hh_s1>;
+def Hexagon_M2_mpyu_hl_s0:
+ si_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyu_hl_s0>;
+def Hexagon_M2_mpyu_hl_s1:
+ si_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyu_hl_s1>;
+def Hexagon_M2_mpyu_lh_s0:
+ si_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyu_lh_s0>;
+def Hexagon_M2_mpyu_lh_s1:
+ si_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyu_lh_s1>;
+def Hexagon_M2_mpyu_ll_s0:
+ si_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyu_ll_s0>;
+def Hexagon_M2_mpyu_ll_s1:
+ si_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyu_ll_s1>;
+
+//Rdd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyud_hh_s0:
+ di_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyud_hh_s0>;
+def Hexagon_M2_mpyud_hh_s1:
+ di_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyud_hh_s1>;
+def Hexagon_M2_mpyud_hl_s0:
+ di_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyud_hl_s0>;
+def Hexagon_M2_mpyud_hl_s1:
+ di_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyud_hl_s1>;
+def Hexagon_M2_mpyud_lh_s0:
+ di_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyud_lh_s0>;
+def Hexagon_M2_mpyud_lh_s1:
+ di_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyud_lh_s1>;
+def Hexagon_M2_mpyud_ll_s0:
+ di_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyud_ll_s0>;
+def Hexagon_M2_mpyud_ll_s1:
+ di_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyud_ll_s1>;
+
+//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyu_acc_hh_s0:
+ si_MInst_sisisi_acc_hh <"mpyu", int_hexagon_M2_mpyu_acc_hh_s0>;
+def Hexagon_M2_mpyu_acc_hh_s1:
+ si_MInst_sisisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hh_s1>;
+def Hexagon_M2_mpyu_acc_hl_s0:
+ si_MInst_sisisi_acc_hl <"mpyu", int_hexagon_M2_mpyu_acc_hl_s0>;
+def Hexagon_M2_mpyu_acc_hl_s1:
+ si_MInst_sisisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hl_s1>;
+def Hexagon_M2_mpyu_acc_lh_s0:
+ si_MInst_sisisi_acc_lh <"mpyu", int_hexagon_M2_mpyu_acc_lh_s0>;
+def Hexagon_M2_mpyu_acc_lh_s1:
+ si_MInst_sisisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_lh_s1>;
+def Hexagon_M2_mpyu_acc_ll_s0:
+ si_MInst_sisisi_acc_ll <"mpyu", int_hexagon_M2_mpyu_acc_ll_s0>;
+def Hexagon_M2_mpyu_acc_ll_s1:
+ si_MInst_sisisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyu_acc_ll_s1>;
+
+//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyu_nac_hh_s0:
+ si_MInst_sisisi_nac_hh <"mpyu", int_hexagon_M2_mpyu_nac_hh_s0>;
+def Hexagon_M2_mpyu_nac_hh_s1:
+ si_MInst_sisisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hh_s1>;
+def Hexagon_M2_mpyu_nac_hl_s0:
+ si_MInst_sisisi_nac_hl <"mpyu", int_hexagon_M2_mpyu_nac_hl_s0>;
+def Hexagon_M2_mpyu_nac_hl_s1:
+ si_MInst_sisisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hl_s1>;
+def Hexagon_M2_mpyu_nac_lh_s0:
+ si_MInst_sisisi_nac_lh <"mpyu", int_hexagon_M2_mpyu_nac_lh_s0>;
+def Hexagon_M2_mpyu_nac_lh_s1:
+ si_MInst_sisisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_lh_s1>;
+def Hexagon_M2_mpyu_nac_ll_s0:
+ si_MInst_sisisi_nac_ll <"mpyu", int_hexagon_M2_mpyu_nac_ll_s0>;
+def Hexagon_M2_mpyu_nac_ll_s1:
+ si_MInst_sisisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyu_nac_ll_s1>;
+
+//Rdd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyud_acc_hh_s0:
+ di_MInst_disisi_acc_hh <"mpyu", int_hexagon_M2_mpyud_acc_hh_s0>;
+def Hexagon_M2_mpyud_acc_hh_s1:
+ di_MInst_disisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hh_s1>;
+def Hexagon_M2_mpyud_acc_hl_s0:
+ di_MInst_disisi_acc_hl <"mpyu", int_hexagon_M2_mpyud_acc_hl_s0>;
+def Hexagon_M2_mpyud_acc_hl_s1:
+ di_MInst_disisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hl_s1>;
+def Hexagon_M2_mpyud_acc_lh_s0:
+ di_MInst_disisi_acc_lh <"mpyu", int_hexagon_M2_mpyud_acc_lh_s0>;
+def Hexagon_M2_mpyud_acc_lh_s1:
+ di_MInst_disisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_lh_s1>;
+def Hexagon_M2_mpyud_acc_ll_s0:
+ di_MInst_disisi_acc_ll <"mpyu", int_hexagon_M2_mpyud_acc_ll_s0>;
+def Hexagon_M2_mpyud_acc_ll_s1:
+ di_MInst_disisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyud_acc_ll_s1>;
+
+//Rdd-=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyud_nac_hh_s0:
+ di_MInst_disisi_nac_hh <"mpyu", int_hexagon_M2_mpyud_nac_hh_s0>;
+def Hexagon_M2_mpyud_nac_hh_s1:
+ di_MInst_disisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hh_s1>;
+def Hexagon_M2_mpyud_nac_hl_s0:
+ di_MInst_disisi_nac_hl <"mpyu", int_hexagon_M2_mpyud_nac_hl_s0>;
+def Hexagon_M2_mpyud_nac_hl_s1:
+ di_MInst_disisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hl_s1>;
+def Hexagon_M2_mpyud_nac_lh_s0:
+ di_MInst_disisi_nac_lh <"mpyu", int_hexagon_M2_mpyud_nac_lh_s0>;
+def Hexagon_M2_mpyud_nac_lh_s1:
+ di_MInst_disisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_lh_s1>;
+def Hexagon_M2_mpyud_nac_ll_s0:
+ di_MInst_disisi_nac_ll <"mpyu", int_hexagon_M2_mpyud_nac_ll_s0>;
+def Hexagon_M2_mpyud_nac_ll_s1:
+ di_MInst_disisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyud_nac_ll_s1>;
+
+
+/********************************************************************
+* MTYPE/VB *
+*********************************************************************/
+
+// MTYPE / VB / Vector reduce add unsigned bytes.
+def Hexagon_A2_vraddub:
+ di_MInst_didi <"vraddub", int_hexagon_A2_vraddub>;
+def Hexagon_A2_vraddub_acc:
+ di_MInst_dididi_acc <"vraddub", int_hexagon_A2_vraddub_acc>;
+
+// MTYPE / VB / Vector sum of absolute differences unsigned bytes.
+def Hexagon_A2_vrsadub:
+ di_MInst_didi <"vrsadub", int_hexagon_A2_vrsadub>;
+def Hexagon_A2_vrsadub_acc:
+ di_MInst_dididi_acc <"vrsadub", int_hexagon_A2_vrsadub_acc>;
+
+/********************************************************************
+* MTYPE/VH *
+*********************************************************************/
+
+// MTYPE / VH / Vector dual multiply.
+def Hexagon_M2_vdmpys_s1:
+ di_MInst_didi_s1_sat <"vdmpy", int_hexagon_M2_vdmpys_s1>;
+def Hexagon_M2_vdmpys_s0:
+ di_MInst_didi_sat <"vdmpy", int_hexagon_M2_vdmpys_s0>;
+def Hexagon_M2_vdmacs_s1:
+ di_MInst_dididi_acc_s1_sat <"vdmpy", int_hexagon_M2_vdmacs_s1>;
+def Hexagon_M2_vdmacs_s0:
+ di_MInst_dididi_acc_sat <"vdmpy", int_hexagon_M2_vdmacs_s0>;
+
+// MTYPE / VH / Vector dual multiply with round and pack.
+def Hexagon_M2_vdmpyrs_s0:
+ si_MInst_didi_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s0>;
+def Hexagon_M2_vdmpyrs_s1:
+ si_MInst_didi_s1_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s1>;
+
+// MTYPE / VH / Vector multiply even halfwords.
+def Hexagon_M2_vmpy2es_s1:
+ di_MInst_didi_s1_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s1>;
+def Hexagon_M2_vmpy2es_s0:
+ di_MInst_didi_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s0>;
+def Hexagon_M2_vmac2es:
+ di_MInst_dididi_acc <"vmpyeh", int_hexagon_M2_vmac2es>;
+def Hexagon_M2_vmac2es_s1:
+ di_MInst_dididi_acc_s1_sat <"vmpyeh", int_hexagon_M2_vmac2es_s1>;
+def Hexagon_M2_vmac2es_s0:
+ di_MInst_dididi_acc_sat <"vmpyeh", int_hexagon_M2_vmac2es_s0>;
+
+// MTYPE / VH / Vector multiply halfwords.
+def Hexagon_M2_vmpy2s_s0:
+ di_MInst_sisi_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0>;
+def Hexagon_M2_vmpy2s_s1:
+ di_MInst_sisi_s1_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1>;
+def Hexagon_M2_vmac2:
+ di_MInst_disisi_acc <"vmpyh", int_hexagon_M2_vmac2>;
+def Hexagon_M2_vmac2s_s0:
+ di_MInst_disisi_acc_sat <"vmpyh", int_hexagon_M2_vmac2s_s0>;
+def Hexagon_M2_vmac2s_s1:
+ di_MInst_disisi_acc_s1_sat <"vmpyh", int_hexagon_M2_vmac2s_s1>;
+
+// MTYPE / VH / Vector multiply halfwords with round and pack.
+def Hexagon_M2_vmpy2s_s0pack:
+ si_MInst_sisi_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0pack>;
+def Hexagon_M2_vmpy2s_s1pack:
+ si_MInst_sisi_s1_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1pack>;
+
+// MTYPE / VH / Vector reduce multiply halfwords.
+// Rxx32+=vrmpyh(Rss32,Rtt32)
+def Hexagon_M2_vrmpy_s0:
+ di_MInst_didi <"vrmpyh", int_hexagon_M2_vrmpy_s0>;
+def Hexagon_M2_vrmac_s0:
+ di_MInst_dididi_acc <"vrmpyh", int_hexagon_M2_vrmac_s0>;
+
+
+/********************************************************************
+* STYPE/ALU *
+*********************************************************************/
+
+// STYPE / ALU / Absolute value.
+def Hexagon_A2_abs:
+ si_SInst_si <"abs", int_hexagon_A2_abs>;
+def Hexagon_A2_absp:
+ di_SInst_di <"abs", int_hexagon_A2_absp>;
+def Hexagon_A2_abssat:
+ si_SInst_si_sat <"abs", int_hexagon_A2_abssat>;
+
+// STYPE / ALU / Negate.
+def Hexagon_A2_negp:
+ di_SInst_di <"neg", int_hexagon_A2_negp>;
+def Hexagon_A2_negsat:
+ si_SInst_si_sat <"neg", int_hexagon_A2_negsat>;
+
+// STYPE / ALU / Logical Not.
+def Hexagon_A2_notp:
+ di_SInst_di <"not", int_hexagon_A2_notp>;
+
+// STYPE / ALU / Sign extend word to doubleword.
+def Hexagon_A2_sxtw:
+ di_SInst_si <"sxtw", int_hexagon_A2_sxtw>;
+
+
+/********************************************************************
+* STYPE/BIT *
+*********************************************************************/
+
+// STYPE / BIT / Count leading.
+def Hexagon_S2_cl0:
+ si_SInst_si <"cl0", int_hexagon_S2_cl0>;
+def Hexagon_S2_cl0p:
+ si_SInst_di <"cl0", int_hexagon_S2_cl0p>;
+def Hexagon_S2_cl1:
+ si_SInst_si <"cl1", int_hexagon_S2_cl1>;
+def Hexagon_S2_cl1p:
+ si_SInst_di <"cl1", int_hexagon_S2_cl1p>;
+def Hexagon_S2_clb:
+ si_SInst_si <"clb", int_hexagon_S2_clb>;
+def Hexagon_S2_clbp:
+ si_SInst_di <"clb", int_hexagon_S2_clbp>;
+def Hexagon_S2_clbnorm:
+ si_SInst_si <"normamt", int_hexagon_S2_clbnorm>;
+
+// STYPE / BIT / Count trailing.
+def Hexagon_S2_ct0:
+ si_SInst_si <"ct0", int_hexagon_S2_ct0>;
+def Hexagon_S2_ct1:
+ si_SInst_si <"ct1", int_hexagon_S2_ct1>;
+
+// STYPE / BIT / Compare bit mask.
+def HEXAGON_C2_bitsclr:
+ qi_SInst_sisi <"bitsclr", int_hexagon_C2_bitsclr>;
+def HEXAGON_C2_bitsclri:
+ qi_SInst_siu6 <"bitsclr", int_hexagon_C2_bitsclri>;
+def HEXAGON_C2_bitsset:
+ qi_SInst_sisi <"bitsset", int_hexagon_C2_bitsset>;
+
+// STYPE / BIT / Extract unsigned.
+// Rd[d][32/64]=extractu(Rs[s],Rt[t],[imm])
+def Hexagon_S2_extractu:
+ si_SInst_siu5u5 <"extractu",int_hexagon_S2_extractu>;
+def Hexagon_S2_extractu_rp:
+ si_SInst_sidi <"extractu",int_hexagon_S2_extractu_rp>;
+def Hexagon_S2_extractup:
+ di_SInst_diu6u6 <"extractu",int_hexagon_S2_extractup>;
+def Hexagon_S2_extractup_rp:
+ di_SInst_didi <"extractu",int_hexagon_S2_extractup_rp>;
+
+// STYPE / BIT / Insert bitfield.
+def HEXAGON_S2_insert:
+ si_SInst_sisiu5u5 <"insert", int_hexagon_S2_insert>;
+def HEXAGON_S2_insert_rp:
+ si_SInst_sisidi <"insert", int_hexagon_S2_insert_rp>;
+def HEXAGON_S2_insertp:
+ di_SInst_didiu6u6 <"insert", int_hexagon_S2_insertp>;
+def HEXAGON_S2_insertp_rp:
+ di_SInst_dididi <"insert", int_hexagon_S2_insertp_rp>;
+
+// STYPE / BIT / Innterleave/deinterleave.
+def HEXAGON_S2_interleave:
+ di_SInst_di <"interleave", int_hexagon_S2_interleave>;
+def HEXAGON_S2_deinterleave:
+ di_SInst_di <"deinterleave", int_hexagon_S2_deinterleave>;
+
+// STYPE / BIT / Linear feedback-shift Iteration.
+def HEXAGON_S2_lfsp:
+ di_SInst_didi <"lfs", int_hexagon_S2_lfsp>;
+
+// STYPE / BIT / Bit reverse.
+def HEXAGON_S2_brev:
+ si_SInst_si <"brev", int_hexagon_S2_brev>;
+
+// STYPE / BIT / Set/Clear/Toggle Bit.
+def Hexagon_S2_setbit_i:
+ si_SInst_siu5 <"setbit", int_hexagon_S2_setbit_i>;
+def Hexagon_S2_togglebit_i:
+ si_SInst_siu5 <"togglebit", int_hexagon_S2_togglebit_i>;
+def Hexagon_S2_clrbit_i:
+ si_SInst_siu5 <"clrbit", int_hexagon_S2_clrbit_i>;
+def Hexagon_S2_setbit_r:
+ si_SInst_sisi <"setbit", int_hexagon_S2_setbit_r>;
+def Hexagon_S2_togglebit_r:
+ si_SInst_sisi <"togglebit", int_hexagon_S2_togglebit_r>;
+def Hexagon_S2_clrbit_r:
+ si_SInst_sisi <"clrbit", int_hexagon_S2_clrbit_r>;
+
+// STYPE / BIT / Test Bit.
+def Hexagon_S2_tstbit_i:
+ qi_SInst_siu5 <"tstbit", int_hexagon_S2_tstbit_i>;
+def Hexagon_S2_tstbit_r:
+ qi_SInst_sisi <"tstbit", int_hexagon_S2_tstbit_r>;
+
+
+/********************************************************************
+* STYPE/COMPLEX *
+*********************************************************************/
+
+// STYPE / COMPLEX / Vector Complex conjugate.
+def Hexagon_A2_vconj:
+ di_SInst_di_sat <"vconj", int_hexagon_A2_vconj>;
+
+// STYPE / COMPLEX / Vector Complex rotate.
+def Hexagon_S2_vcrotate:
+ di_SInst_disi <"vcrotate",int_hexagon_S2_vcrotate>;
+
+
+/********************************************************************
+* STYPE/PERM *
+*********************************************************************/
+
+// STYPE / PERM / Saturate.
+def Hexagon_A2_sat:
+ si_SInst_di <"sat", int_hexagon_A2_sat>;
+def Hexagon_A2_satb:
+ si_SInst_si <"satb", int_hexagon_A2_satb>;
+def Hexagon_A2_sath:
+ si_SInst_si <"sath", int_hexagon_A2_sath>;
+def Hexagon_A2_satub:
+ si_SInst_si <"satub", int_hexagon_A2_satub>;
+def Hexagon_A2_satuh:
+ si_SInst_si <"satuh", int_hexagon_A2_satuh>;
+
+// STYPE / PERM / Swizzle bytes.
+def Hexagon_A2_swiz:
+ si_SInst_si <"swiz", int_hexagon_A2_swiz>;
+
+// STYPE / PERM / Vector align.
+// Need custom lowering
+def Hexagon_S2_valignib:
+ di_SInst_didiu3 <"valignb", int_hexagon_S2_valignib>;
+def Hexagon_S2_valignrb:
+ di_SInst_didiqi <"valignb", int_hexagon_S2_valignrb>;
+
+// STYPE / PERM / Vector round and pack.
+def Hexagon_S2_vrndpackwh:
+ si_SInst_di <"vrndwh", int_hexagon_S2_vrndpackwh>;
+def Hexagon_S2_vrndpackwhs:
+ si_SInst_di_sat <"vrndwh", int_hexagon_S2_vrndpackwhs>;
+
+// STYPE / PERM / Vector saturate and pack.
+def Hexagon_S2_svsathb:
+ si_SInst_si <"vsathb", int_hexagon_S2_svsathb>;
+def Hexagon_S2_vsathb:
+ si_SInst_di <"vsathb", int_hexagon_S2_vsathb>;
+def Hexagon_S2_svsathub:
+ si_SInst_si <"vsathub", int_hexagon_S2_svsathub>;
+def Hexagon_S2_vsathub:
+ si_SInst_di <"vsathub", int_hexagon_S2_vsathub>;
+def Hexagon_S2_vsatwh:
+ si_SInst_di <"vsatwh", int_hexagon_S2_vsatwh>;
+def Hexagon_S2_vsatwuh:
+ si_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh>;
+
+// STYPE / PERM / Vector saturate without pack.
+def Hexagon_S2_vsathb_nopack:
+ di_SInst_di <"vsathb", int_hexagon_S2_vsathb_nopack>;
+def Hexagon_S2_vsathub_nopack:
+ di_SInst_di <"vsathub", int_hexagon_S2_vsathub_nopack>;
+def Hexagon_S2_vsatwh_nopack:
+ di_SInst_di <"vsatwh", int_hexagon_S2_vsatwh_nopack>;
+def Hexagon_S2_vsatwuh_nopack:
+ di_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh_nopack>;
+
+// STYPE / PERM / Vector shuffle.
+def Hexagon_S2_shuffeb:
+ di_SInst_didi <"shuffeb", int_hexagon_S2_shuffeb>;
+def Hexagon_S2_shuffeh:
+ di_SInst_didi <"shuffeh", int_hexagon_S2_shuffeh>;
+def Hexagon_S2_shuffob:
+ di_SInst_didi <"shuffob", int_hexagon_S2_shuffob>;
+def Hexagon_S2_shuffoh:
+ di_SInst_didi <"shuffoh", int_hexagon_S2_shuffoh>;
+
+// STYPE / PERM / Vector splat bytes.
+def Hexagon_S2_vsplatrb:
+ si_SInst_si <"vsplatb", int_hexagon_S2_vsplatrb>;
+
+// STYPE / PERM / Vector splat halfwords.
+def Hexagon_S2_vsplatrh:
+ di_SInst_si <"vsplath", int_hexagon_S2_vsplatrh>;
+
+// STYPE / PERM / Vector splice.
+def HEXAGON_S2_vsplicerb:
+ di_SInst_didiqi <"vspliceb",int_hexagon_S2_vsplicerb>;
+def HEXAGON_S2_vspliceib:
+ di_SInst_didiu3 <"vspliceb",int_hexagon_S2_vspliceib>;
+
+// STYPE / PERM / Sign extend.
+def Hexagon_S2_vsxtbh:
+ di_SInst_si <"vsxtbh", int_hexagon_S2_vsxtbh>;
+def Hexagon_S2_vsxthw:
+ di_SInst_si <"vsxthw", int_hexagon_S2_vsxthw>;
+
+// STYPE / PERM / Truncate.
+def Hexagon_S2_vtrunehb:
+ si_SInst_di <"vtrunehb",int_hexagon_S2_vtrunehb>;
+def Hexagon_S2_vtrunohb:
+ si_SInst_di <"vtrunohb",int_hexagon_S2_vtrunohb>;
+def Hexagon_S2_vtrunewh:
+ di_SInst_didi <"vtrunewh",int_hexagon_S2_vtrunewh>;
+def Hexagon_S2_vtrunowh:
+ di_SInst_didi <"vtrunowh",int_hexagon_S2_vtrunowh>;
+
+// STYPE / PERM / Zero extend.
+def Hexagon_S2_vzxtbh:
+ di_SInst_si <"vzxtbh", int_hexagon_S2_vzxtbh>;
+def Hexagon_S2_vzxthw:
+ di_SInst_si <"vzxthw", int_hexagon_S2_vzxthw>;
+
+
+/********************************************************************
+* STYPE/PRED *
+*********************************************************************/
+
+// STYPE / PRED / Mask generate from predicate.
+def Hexagon_C2_mask:
+ di_SInst_qi <"mask", int_hexagon_C2_mask>;
+
+// STYPE / PRED / Predicate transfer.
+def Hexagon_C2_tfrpr:
+ si_SInst_qi <"", int_hexagon_C2_tfrpr>;
+def Hexagon_C2_tfrrp:
+ qi_SInst_si <"", int_hexagon_C2_tfrrp>;
+
+// STYPE / PRED / Viterbi pack even and odd predicate bits.
+def Hexagon_C2_vitpack:
+ si_SInst_qiqi <"vitpack",int_hexagon_C2_vitpack>;
+
+
+/********************************************************************
+* STYPE/SHIFT *
+*********************************************************************/
+
+// STYPE / SHIFT / Shift by immediate.
+def Hexagon_S2_asl_i_r:
+ si_SInst_siu5 <"asl", int_hexagon_S2_asl_i_r>;
+def Hexagon_S2_asr_i_r:
+ si_SInst_siu5 <"asr", int_hexagon_S2_asr_i_r>;
+def Hexagon_S2_lsr_i_r:
+ si_SInst_siu5 <"lsr", int_hexagon_S2_lsr_i_r>;
+def Hexagon_S2_asl_i_p:
+ di_SInst_diu6 <"asl", int_hexagon_S2_asl_i_p>;
+def Hexagon_S2_asr_i_p:
+ di_SInst_diu6 <"asr", int_hexagon_S2_asr_i_p>;
+def Hexagon_S2_lsr_i_p:
+ di_SInst_diu6 <"lsr", int_hexagon_S2_lsr_i_p>;
+
+// STYPE / SHIFT / Shift by immediate and accumulate.
+def Hexagon_S2_asl_i_r_acc:
+ si_SInst_sisiu5_acc <"asl", int_hexagon_S2_asl_i_r_acc>;
+def Hexagon_S2_asr_i_r_acc:
+ si_SInst_sisiu5_acc <"asr", int_hexagon_S2_asr_i_r_acc>;
+def Hexagon_S2_lsr_i_r_acc:
+ si_SInst_sisiu5_acc <"lsr", int_hexagon_S2_lsr_i_r_acc>;
+def Hexagon_S2_asl_i_r_nac:
+ si_SInst_sisiu5_nac <"asl", int_hexagon_S2_asl_i_r_nac>;
+def Hexagon_S2_asr_i_r_nac:
+ si_SInst_sisiu5_nac <"asr", int_hexagon_S2_asr_i_r_nac>;
+def Hexagon_S2_lsr_i_r_nac:
+ si_SInst_sisiu5_nac <"lsr", int_hexagon_S2_lsr_i_r_nac>;
+def Hexagon_S2_asl_i_p_acc:
+ di_SInst_didiu6_acc <"asl", int_hexagon_S2_asl_i_p_acc>;
+def Hexagon_S2_asr_i_p_acc:
+ di_SInst_didiu6_acc <"asr", int_hexagon_S2_asr_i_p_acc>;
+def Hexagon_S2_lsr_i_p_acc:
+ di_SInst_didiu6_acc <"lsr", int_hexagon_S2_lsr_i_p_acc>;
+def Hexagon_S2_asl_i_p_nac:
+ di_SInst_didiu6_nac <"asl", int_hexagon_S2_asl_i_p_nac>;
+def Hexagon_S2_asr_i_p_nac:
+ di_SInst_didiu6_nac <"asr", int_hexagon_S2_asr_i_p_nac>;
+def Hexagon_S2_lsr_i_p_nac:
+ di_SInst_didiu6_nac <"lsr", int_hexagon_S2_lsr_i_p_nac>;
+
+// STYPE / SHIFT / Shift by immediate and add.
+def Hexagon_S2_addasl_rrri:
+ si_SInst_sisiu3 <"addasl", int_hexagon_S2_addasl_rrri>;
+
+// STYPE / SHIFT / Shift by immediate and logical.
+def Hexagon_S2_asl_i_r_and:
+ si_SInst_sisiu5_and <"asl", int_hexagon_S2_asl_i_r_and>;
+def Hexagon_S2_asr_i_r_and:
+ si_SInst_sisiu5_and <"asr", int_hexagon_S2_asr_i_r_and>;
+def Hexagon_S2_lsr_i_r_and:
+ si_SInst_sisiu5_and <"lsr", int_hexagon_S2_lsr_i_r_and>;
+
+def Hexagon_S2_asl_i_r_xacc:
+ si_SInst_sisiu5_xor <"asl", int_hexagon_S2_asl_i_r_xacc>;
+def Hexagon_S2_lsr_i_r_xacc:
+ si_SInst_sisiu5_xor <"lsr", int_hexagon_S2_lsr_i_r_xacc>;
+
+def Hexagon_S2_asl_i_r_or:
+ si_SInst_sisiu5_or <"asl", int_hexagon_S2_asl_i_r_or>;
+def Hexagon_S2_asr_i_r_or:
+ si_SInst_sisiu5_or <"asr", int_hexagon_S2_asr_i_r_or>;
+def Hexagon_S2_lsr_i_r_or:
+ si_SInst_sisiu5_or <"lsr", int_hexagon_S2_lsr_i_r_or>;
+
+def Hexagon_S2_asl_i_p_and:
+ di_SInst_didiu6_and <"asl", int_hexagon_S2_asl_i_p_and>;
+def Hexagon_S2_asr_i_p_and:
+ di_SInst_didiu6_and <"asr", int_hexagon_S2_asr_i_p_and>;
+def Hexagon_S2_lsr_i_p_and:
+ di_SInst_didiu6_and <"lsr", int_hexagon_S2_lsr_i_p_and>;
+
+def Hexagon_S2_asl_i_p_xacc:
+ di_SInst_didiu6_xor <"asl", int_hexagon_S2_asl_i_p_xacc>;
+def Hexagon_S2_lsr_i_p_xacc:
+ di_SInst_didiu6_xor <"lsr", int_hexagon_S2_lsr_i_p_xacc>;
+
+def Hexagon_S2_asl_i_p_or:
+ di_SInst_didiu6_or <"asl", int_hexagon_S2_asl_i_p_or>;
+def Hexagon_S2_asr_i_p_or:
+ di_SInst_didiu6_or <"asr", int_hexagon_S2_asr_i_p_or>;
+def Hexagon_S2_lsr_i_p_or:
+ di_SInst_didiu6_or <"lsr", int_hexagon_S2_lsr_i_p_or>;
+
+// STYPE / SHIFT / Shift right by immediate with rounding.
+def Hexagon_S2_asr_i_r_rnd:
+ si_SInst_siu5_rnd <"asr", int_hexagon_S2_asr_i_r_rnd>;
+def Hexagon_S2_asr_i_r_rnd_goodsyntax:
+ si_SInst_siu5 <"asrrnd", int_hexagon_S2_asr_i_r_rnd_goodsyntax>;
+
+// STYPE / SHIFT / Shift left by immediate with saturation.
+def Hexagon_S2_asl_i_r_sat:
+ si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_i_r_sat>;
+
+// STYPE / SHIFT / Shift by register.
+def Hexagon_S2_asl_r_r:
+ si_SInst_sisi <"asl", int_hexagon_S2_asl_r_r>;
+def Hexagon_S2_asr_r_r:
+ si_SInst_sisi <"asr", int_hexagon_S2_asr_r_r>;
+def Hexagon_S2_lsl_r_r:
+ si_SInst_sisi <"lsl", int_hexagon_S2_lsl_r_r>;
+def Hexagon_S2_lsr_r_r:
+ si_SInst_sisi <"lsr", int_hexagon_S2_lsr_r_r>;
+def Hexagon_S2_asl_r_p:
+ di_SInst_disi <"asl", int_hexagon_S2_asl_r_p>;
+def Hexagon_S2_asr_r_p:
+ di_SInst_disi <"asr", int_hexagon_S2_asr_r_p>;
+def Hexagon_S2_lsl_r_p:
+ di_SInst_disi <"lsl", int_hexagon_S2_lsl_r_p>;
+def Hexagon_S2_lsr_r_p:
+ di_SInst_disi <"lsr", int_hexagon_S2_lsr_r_p>;
+
+// STYPE / SHIFT / Shift by register and accumulate.
+def Hexagon_S2_asl_r_r_acc:
+ si_SInst_sisisi_acc <"asl", int_hexagon_S2_asl_r_r_acc>;
+def Hexagon_S2_asr_r_r_acc:
+ si_SInst_sisisi_acc <"asr", int_hexagon_S2_asr_r_r_acc>;
+def Hexagon_S2_lsl_r_r_acc:
+ si_SInst_sisisi_acc <"lsl", int_hexagon_S2_lsl_r_r_acc>;
+def Hexagon_S2_lsr_r_r_acc:
+ si_SInst_sisisi_acc <"lsr", int_hexagon_S2_lsr_r_r_acc>;
+def Hexagon_S2_asl_r_p_acc:
+ di_SInst_didisi_acc <"asl", int_hexagon_S2_asl_r_p_acc>;
+def Hexagon_S2_asr_r_p_acc:
+ di_SInst_didisi_acc <"asr", int_hexagon_S2_asr_r_p_acc>;
+def Hexagon_S2_lsl_r_p_acc:
+ di_SInst_didisi_acc <"lsl", int_hexagon_S2_lsl_r_p_acc>;
+def Hexagon_S2_lsr_r_p_acc:
+ di_SInst_didisi_acc <"lsr", int_hexagon_S2_lsr_r_p_acc>;
+
+def Hexagon_S2_asl_r_r_nac:
+ si_SInst_sisisi_nac <"asl", int_hexagon_S2_asl_r_r_nac>;
+def Hexagon_S2_asr_r_r_nac:
+ si_SInst_sisisi_nac <"asr", int_hexagon_S2_asr_r_r_nac>;
+def Hexagon_S2_lsl_r_r_nac:
+ si_SInst_sisisi_nac <"lsl", int_hexagon_S2_lsl_r_r_nac>;
+def Hexagon_S2_lsr_r_r_nac:
+ si_SInst_sisisi_nac <"lsr", int_hexagon_S2_lsr_r_r_nac>;
+def Hexagon_S2_asl_r_p_nac:
+ di_SInst_didisi_nac <"asl", int_hexagon_S2_asl_r_p_nac>;
+def Hexagon_S2_asr_r_p_nac:
+ di_SInst_didisi_nac <"asr", int_hexagon_S2_asr_r_p_nac>;
+def Hexagon_S2_lsl_r_p_nac:
+ di_SInst_didisi_nac <"lsl", int_hexagon_S2_lsl_r_p_nac>;
+def Hexagon_S2_lsr_r_p_nac:
+ di_SInst_didisi_nac <"lsr", int_hexagon_S2_lsr_r_p_nac>;
+
+// STYPE / SHIFT / Shift by register and logical.
+def Hexagon_S2_asl_r_r_and:
+ si_SInst_sisisi_and <"asl", int_hexagon_S2_asl_r_r_and>;
+def Hexagon_S2_asr_r_r_and:
+ si_SInst_sisisi_and <"asr", int_hexagon_S2_asr_r_r_and>;
+def Hexagon_S2_lsl_r_r_and:
+ si_SInst_sisisi_and <"lsl", int_hexagon_S2_lsl_r_r_and>;
+def Hexagon_S2_lsr_r_r_and:
+ si_SInst_sisisi_and <"lsr", int_hexagon_S2_lsr_r_r_and>;
+
+def Hexagon_S2_asl_r_r_or:
+ si_SInst_sisisi_or <"asl", int_hexagon_S2_asl_r_r_or>;
+def Hexagon_S2_asr_r_r_or:
+ si_SInst_sisisi_or <"asr", int_hexagon_S2_asr_r_r_or>;
+def Hexagon_S2_lsl_r_r_or:
+ si_SInst_sisisi_or <"lsl", int_hexagon_S2_lsl_r_r_or>;
+def Hexagon_S2_lsr_r_r_or:
+ si_SInst_sisisi_or <"lsr", int_hexagon_S2_lsr_r_r_or>;
+
+def Hexagon_S2_asl_r_p_and:
+ di_SInst_didisi_and <"asl", int_hexagon_S2_asl_r_p_and>;
+def Hexagon_S2_asr_r_p_and:
+ di_SInst_didisi_and <"asr", int_hexagon_S2_asr_r_p_and>;
+def Hexagon_S2_lsl_r_p_and:
+ di_SInst_didisi_and <"lsl", int_hexagon_S2_lsl_r_p_and>;
+def Hexagon_S2_lsr_r_p_and:
+ di_SInst_didisi_and <"lsr", int_hexagon_S2_lsr_r_p_and>;
+
+def Hexagon_S2_asl_r_p_or:
+ di_SInst_didisi_or <"asl", int_hexagon_S2_asl_r_p_or>;
+def Hexagon_S2_asr_r_p_or:
+ di_SInst_didisi_or <"asr", int_hexagon_S2_asr_r_p_or>;
+def Hexagon_S2_lsl_r_p_or:
+ di_SInst_didisi_or <"lsl", int_hexagon_S2_lsl_r_p_or>;
+def Hexagon_S2_lsr_r_p_or:
+ di_SInst_didisi_or <"lsr", int_hexagon_S2_lsr_r_p_or>;
+
+// STYPE / SHIFT / Shift by register with saturation.
+def Hexagon_S2_asl_r_r_sat:
+ si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_r_r_sat>;
+def Hexagon_S2_asr_r_r_sat:
+ si_SInst_sisi_sat <"asr", int_hexagon_S2_asr_r_r_sat>;
+
+// STYPE / SHIFT / Table Index.
+def HEXAGON_S2_tableidxb_goodsyntax:
+ si_MInst_sisiu4u5 <"tableidxb",int_hexagon_S2_tableidxb_goodsyntax>;
+def HEXAGON_S2_tableidxd_goodsyntax:
+ si_MInst_sisiu4u5 <"tableidxd",int_hexagon_S2_tableidxd_goodsyntax>;
+def HEXAGON_S2_tableidxh_goodsyntax:
+ si_MInst_sisiu4u5 <"tableidxh",int_hexagon_S2_tableidxh_goodsyntax>;
+def HEXAGON_S2_tableidxw_goodsyntax:
+ si_MInst_sisiu4u5 <"tableidxw",int_hexagon_S2_tableidxw_goodsyntax>;
+
+
+/********************************************************************
+* STYPE/VH *
+*********************************************************************/
+
+// STYPE / VH / Vector absolute value halfwords.
+// Rdd64=vabsh(Rss64)
+def Hexagon_A2_vabsh:
+ di_SInst_di <"vabsh", int_hexagon_A2_vabsh>;
+def Hexagon_A2_vabshsat:
+ di_SInst_di_sat <"vabsh", int_hexagon_A2_vabshsat>;
+
+// STYPE / VH / Vector shift halfwords by immediate.
+// Rdd64=v[asl/asr/lsr]h(Rss64,Rt32)
+def Hexagon_S2_asl_i_vh:
+ di_SInst_disi <"vaslh", int_hexagon_S2_asl_i_vh>;
+def Hexagon_S2_asr_i_vh:
+ di_SInst_disi <"vasrh", int_hexagon_S2_asr_i_vh>;
+def Hexagon_S2_lsr_i_vh:
+ di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_i_vh>;
+
+// STYPE / VH / Vector shift halfwords by register.
+// Rdd64=v[asl/asr/lsl/lsr]w(Rss64,Rt32)
+def Hexagon_S2_asl_r_vh:
+ di_SInst_disi <"vaslh", int_hexagon_S2_asl_r_vh>;
+def Hexagon_S2_asr_r_vh:
+ di_SInst_disi <"vasrh", int_hexagon_S2_asr_r_vh>;
+def Hexagon_S2_lsl_r_vh:
+ di_SInst_disi <"vlslh", int_hexagon_S2_lsl_r_vh>;
+def Hexagon_S2_lsr_r_vh:
+ di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_r_vh>;
+
+
+/********************************************************************
+* STYPE/VW *
+*********************************************************************/
+
+// STYPE / VW / Vector absolute value words.
+def Hexagon_A2_vabsw:
+ di_SInst_di <"vabsw", int_hexagon_A2_vabsw>;
+def Hexagon_A2_vabswsat:
+ di_SInst_di_sat <"vabsw", int_hexagon_A2_vabswsat>;
+
+// STYPE / VW / Vector shift words by immediate.
+// Rdd64=v[asl/vsl]w(Rss64,Rt32)
+def Hexagon_S2_asl_i_vw:
+ di_SInst_disi <"vaslw", int_hexagon_S2_asl_i_vw>;
+def Hexagon_S2_asr_i_vw:
+ di_SInst_disi <"vasrw", int_hexagon_S2_asr_i_vw>;
+def Hexagon_S2_lsr_i_vw:
+ di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_i_vw>;
+
+// STYPE / VW / Vector shift words by register.
+// Rdd64=v[asl/vsl]w(Rss64,Rt32)
+def Hexagon_S2_asl_r_vw:
+ di_SInst_disi <"vaslw", int_hexagon_S2_asl_r_vw>;
+def Hexagon_S2_asr_r_vw:
+ di_SInst_disi <"vasrw", int_hexagon_S2_asr_r_vw>;
+def Hexagon_S2_lsl_r_vw:
+ di_SInst_disi <"vlslw", int_hexagon_S2_lsl_r_vw>;
+def Hexagon_S2_lsr_r_vw:
+ di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_r_vw>;
+
+// STYPE / VW / Vector shift words with truncate and pack.
+def Hexagon_S2_asr_r_svw_trun:
+ si_SInst_disi <"vasrw", int_hexagon_S2_asr_r_svw_trun>;
+def Hexagon_S2_asr_i_svw_trun:
+ si_SInst_diu5 <"vasrw", int_hexagon_S2_asr_i_svw_trun>;
+
+include "HexagonIntrinsicsV3.td"
+include "HexagonIntrinsicsV4.td"
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
new file mode 100644
index 0000000..68eaf68
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
@@ -0,0 +1,29 @@
+//===-- HexagonIntrinsicsDerived.td - Derived intrinsics ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Multiply 64-bit and use lower result
+//
+// Optimized with intrinisics accumulates
+//
+def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2),
+ (COMBINE_rr
+ (Hexagon_M2_maci
+ (Hexagon_M2_maci (EXTRACT_SUBREG (MPYU64 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg)),
+ subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)),
+ (EXTRACT_SUBREG (MPYU64 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg)),
+ subreg_loreg))>;
+
+
+
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV3.td b/lib/Target/Hexagon/HexagonIntrinsicsV3.td
new file mode 100644
index 0000000..2a54e62
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV3.td
@@ -0,0 +1,50 @@
+//=- HexagonIntrinsicsV3.td - Target Description for Hexagon -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V3 Compiler Intrinsics in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+
+// MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary.
+def Hexagon_M2_vrcmpys_s1:
+ di_MInst_disi_s1_sat <"vrcmpys", int_hexagon_M2_vrcmpys_s1>;
+def Hexagon_M2_vrcmpys_acc_s1:
+ di_MInst_didisi_acc_s1_sat <"vrcmpys", int_hexagon_M2_vrcmpys_acc_s1>;
+def Hexagon_M2_vrcmpys_s1rp:
+ si_MInst_disi_s1_rnd_sat <"vrcmpys", int_hexagon_M2_vrcmpys_s1rp>;
+
+
+
+
+/********************************************************************
+* MTYPE/VB *
+*********************************************************************/
+
+// MTYPE / VB / Vector reduce add unsigned bytes.
+def Hexagon_M2_vradduh:
+ si_MInst_didi <"vradduh", int_hexagon_M2_vradduh>;
+
+
+/********************************************************************
+* ALU64/ALU *
+*********************************************************************/
+
+// ALU64 / ALU / Add.
+def Hexagon_A2_addsp:
+ di_ALU64_sidi <"add", int_hexagon_A2_addsp>;
+def Hexagon_A2_addpsat:
+ di_ALU64_didi <"add", int_hexagon_A2_addpsat>;
+
+def Hexagon_A2_maxp:
+ di_ALU64_didi <"max", int_hexagon_A2_maxp>;
+def Hexagon_A2_maxup:
+ di_ALU64_didi <"maxu", int_hexagon_A2_maxup>;
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/lib/Target/Hexagon/HexagonIntrinsicsV4.td
new file mode 100644
index 0000000..dd28ebb
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV4.td
@@ -0,0 +1,369 @@
+//===- HexagonIntrinsicsV4.td - V4 Instruction intrinsics --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is populated based on the following specs:
+// Hexagon V4 Architecture Extensions
+// Application-Level Specification
+// 80-V9418-12 Rev. A
+// June 15, 2010
+
+
+//
+// ALU 32 types.
+//
+
+class si_ALU32_sisi_not<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_ALU32_s8si<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1, IntRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>;
+
+class di_ALU32_sis8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set DoubleRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_neg_ALU32_sisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_neg_ALU32_sis10<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_neg_ALU32_siu9<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_neg_ALU32_sisi<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_neg_ALU32_sis8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_sis8<string opc, Intrinsic IntID>
+ : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+
+//
+// SInst Classes.
+//
+class qi_neg_SInst_qiqi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+ !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_qi_andqiqi_neg<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, and($src2, !$src3)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class qi_SInst_qi_andqiqi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, and($src2, $src3)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class qi_SInst_qi_orqiqi_neg<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, or($src2, !$src3)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class qi_SInst_qi_orqiqi<string opc, Intrinsic IntID>
+ : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, or($src2, $src3)")),
+ [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_SInst_si_addsis6<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, add($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ imm:$src3))]>;
+
+class si_SInst_si_subs6si<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, sub(#$src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2,
+ IntRegs:$src3))]>;
+
+class di_ALU64_didi_neg<string opc, Intrinsic IntID>
+ : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_MInst_dididi_xacc<string opc, Intrinsic IntID>
+ : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src1, $src2)")),
+ [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+ DoubleRegs:$src2))],
+ "$dst2 = $dst">;
+
+class si_MInst_sisisi_and<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst &= ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_MInst_sisisi_andn<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst &= ", !strconcat(opc , "($src2, ~$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_SInst_sisis10_andi<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s10Imm:$src3),
+ !strconcat("$dst = ", !strconcat(opc ,
+ "($src1, and($src2, #$src3))")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+ imm:$src3))]>;
+
+class si_MInst_sisisi_xor<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_MInst_sisisi_xorn<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst ^= ", !strconcat(opc , "($src2, ~$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_SInst_sisis10_or<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, s10Imm:$src3),
+ !strconcat("$dst |= ", !strconcat(opc , "($src2, #$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ imm:$src3))]>;
+
+class si_MInst_sisisi_or<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst |= ", !strconcat(opc , "($src2, $src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_MInst_sisisi_orn<string opc, Intrinsic IntID>
+ : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3),
+ !strconcat("$dst |= ", !strconcat(opc , "($src2, ~$src3)")),
+ [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+ IntRegs:$src3))]>;
+
+class si_SInst_siu5_sat<string opc, Intrinsic IntID>
+ : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+ !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")),
+ [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+
+/********************************************************************
+* ALU32/ALU *
+*********************************************************************/
+
+// ALU32 / ALU / Logical Operations.
+def Hexagon_A4_orn : si_ALU32_sisi_not <"or", int_hexagon_A4_orn>;
+def Hexagon_A4_andn : si_ALU32_sisi_not <"and", int_hexagon_A4_andn>;
+
+
+/********************************************************************
+* ALU32/PERM *
+*********************************************************************/
+
+// ALU32 / PERM / Combine Words Into Doublewords.
+def Hexagon_A4_combineir : di_ALU32_s8si <"combine", int_hexagon_A4_combineir>;
+def Hexagon_A4_combineri : di_ALU32_sis8 <"combine", int_hexagon_A4_combineri>;
+
+
+/********************************************************************
+* ALU32/PRED *
+*********************************************************************/
+
+// ALU32 / PRED / Conditional Shift Halfword.
+// ALU32 / PRED / Conditional Sign Extend.
+// ALU32 / PRED / Conditional Zero Extend.
+// ALU32 / PRED / Compare.
+def Hexagon_C4_cmpneq : qi_neg_ALU32_sisi <"cmp.eq", int_hexagon_C4_cmpneq>;
+def Hexagon_C4_cmpneqi : qi_neg_ALU32_sis10 <"cmp.eq", int_hexagon_C4_cmpneqi>;
+def Hexagon_C4_cmplte : qi_neg_ALU32_sisi <"cmp.gt", int_hexagon_C4_cmplte>;
+def Hexagon_C4_cmpltei : qi_neg_ALU32_sis10 <"cmp.gt", int_hexagon_C4_cmpltei>;
+def Hexagon_C4_cmplteu : qi_neg_ALU32_sisi <"cmp.gtu",int_hexagon_C4_cmplteu>;
+def Hexagon_C4_cmplteui: qi_neg_ALU32_siu9 <"cmp.gtu",int_hexagon_C4_cmplteui>;
+
+// ALU32 / PRED / cmpare To General Register.
+def Hexagon_A4_rcmpneq : si_neg_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpneq>;
+def Hexagon_A4_rcmpneqi: si_neg_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpneqi>;
+def Hexagon_A4_rcmpeq : si_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpeq>;
+def Hexagon_A4_rcmpeqi : si_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpeqi>;
+
+
+/********************************************************************
+* CR *
+*********************************************************************/
+
+// CR / Corner Detection Acceleration.
+def Hexagon_C4_fastcorner9:
+ qi_SInst_qiqi<"fastcorner9", int_hexagon_C4_fastcorner9>;
+def Hexagon_C4_fastcorner9_not:
+ qi_neg_SInst_qiqi<"fastcorner9",int_hexagon_C4_fastcorner9_not>;
+
+// CR / Logical Operations On Predicates.
+def Hexagon_C4_and_andn:
+ qi_SInst_qi_andqiqi_neg <"and", int_hexagon_C4_and_andn>;
+def Hexagon_C4_and_and:
+ qi_SInst_qi_andqiqi <"and", int_hexagon_C4_and_and>;
+def Hexagon_C4_and_orn:
+ qi_SInst_qi_orqiqi_neg <"and", int_hexagon_C4_and_orn>;
+def Hexagon_C4_and_or:
+ qi_SInst_qi_orqiqi <"and", int_hexagon_C4_and_or>;
+def Hexagon_C4_or_andn:
+ qi_SInst_qi_andqiqi_neg <"or", int_hexagon_C4_or_andn>;
+def Hexagon_C4_or_and:
+ qi_SInst_qi_andqiqi <"or", int_hexagon_C4_or_and>;
+def Hexagon_C4_or_orn:
+ qi_SInst_qi_orqiqi_neg <"or", int_hexagon_C4_or_orn>;
+def Hexagon_C4_or_or:
+ qi_SInst_qi_orqiqi <"or", int_hexagon_C4_or_or>;
+
+
+/********************************************************************
+* XTYPE/ALU *
+*********************************************************************/
+
+// XTYPE / ALU / Add And Accumulate.
+def Hexagon_S4_addaddi:
+ si_SInst_si_addsis6 <"add", int_hexagon_S4_addaddi>;
+def Hexagon_S4_subaddi:
+ si_SInst_si_subs6si <"add", int_hexagon_S4_subaddi>;
+
+// XTYPE / ALU / Logical Doublewords.
+def Hexagon_S4_andnp:
+ di_ALU64_didi_neg <"and", int_hexagon_A4_andnp>;
+def Hexagon_S4_ornp:
+ di_ALU64_didi_neg <"or", int_hexagon_A4_ornp>;
+
+// XTYPE / ALU / Logical-logical Doublewords.
+def Hexagon_M4_xor_xacc:
+ di_MInst_dididi_xacc <"xor", int_hexagon_M4_xor_xacc>;
+
+// XTYPE / ALU / Logical-logical Words.
+def HEXAGON_M4_and_and:
+ si_MInst_sisisi_and <"and", int_hexagon_M4_and_and>;
+def HEXAGON_M4_and_or:
+ si_MInst_sisisi_and <"or", int_hexagon_M4_and_or>;
+def HEXAGON_M4_and_xor:
+ si_MInst_sisisi_and <"xor", int_hexagon_M4_and_xor>;
+def HEXAGON_M4_and_andn:
+ si_MInst_sisisi_andn <"and", int_hexagon_M4_and_andn>;
+def HEXAGON_M4_xor_and:
+ si_MInst_sisisi_xor <"and", int_hexagon_M4_xor_and>;
+def HEXAGON_M4_xor_or:
+ si_MInst_sisisi_xor <"or", int_hexagon_M4_xor_or>;
+def HEXAGON_M4_xor_andn:
+ si_MInst_sisisi_xorn <"and", int_hexagon_M4_xor_andn>;
+def HEXAGON_M4_or_and:
+ si_MInst_sisisi_or <"and", int_hexagon_M4_or_and>;
+def HEXAGON_M4_or_or:
+ si_MInst_sisisi_or <"or", int_hexagon_M4_or_or>;
+def HEXAGON_M4_or_xor:
+ si_MInst_sisisi_or <"xor", int_hexagon_M4_or_xor>;
+def HEXAGON_M4_or_andn:
+ si_MInst_sisisi_orn <"and", int_hexagon_M4_or_andn>;
+def HEXAGON_S4_or_andix:
+ si_SInst_sisis10_andi <"or", int_hexagon_S4_or_andix>;
+def HEXAGON_S4_or_andi:
+ si_SInst_sisis10_or <"and", int_hexagon_S4_or_andi>;
+def HEXAGON_S4_or_ori:
+ si_SInst_sisis10_or <"or", int_hexagon_S4_or_ori>;
+
+// XTYPE / ALU / Modulo wrap.
+def HEXAGON_A4_modwrapu:
+ si_ALU64_sisi <"modwrap", int_hexagon_A4_modwrapu>;
+
+// XTYPE / ALU / Round.
+def HEXAGON_A4_cround_ri:
+ si_SInst_siu5 <"cround", int_hexagon_A4_cround_ri>;
+def HEXAGON_A4_cround_rr:
+ si_SInst_sisi <"cround", int_hexagon_A4_cround_rr>;
+def HEXAGON_A4_round_ri:
+ si_SInst_siu5 <"round", int_hexagon_A4_round_ri>;
+def HEXAGON_A4_round_rr:
+ si_SInst_sisi <"round", int_hexagon_A4_round_rr>;
+def HEXAGON_A4_round_ri_sat:
+ si_SInst_siu5_sat <"round", int_hexagon_A4_round_ri_sat>;
+def HEXAGON_A4_round_rr_sat:
+ si_SInst_sisi_sat <"round", int_hexagon_A4_round_rr_sat>;
+
+// XTYPE / ALU / Vector reduce add unsigned halfwords.
+// XTYPE / ALU / Vector add bytes.
+// XTYPE / ALU / Vector conditional negate.
+// XTYPE / ALU / Vector maximum bytes.
+// XTYPE / ALU / Vector reduce maximum halfwords.
+// XTYPE / ALU / Vector reduce maximum words.
+// XTYPE / ALU / Vector minimum bytes.
+// XTYPE / ALU / Vector reduce minimum halfwords.
+// XTYPE / ALU / Vector reduce minimum words.
+// XTYPE / ALU / Vector subtract bytes.
+
+
+/********************************************************************
+* XTYPE/BIT *
+*********************************************************************/
+
+// XTYPE / BIT / Count leading.
+// XTYPE / BIT / Count trailing.
+// XTYPE / BIT / Extract bitfield.
+// XTYPE / BIT / Masked parity.
+// XTYPE / BIT / Bit reverse.
+// XTYPE / BIT / Split bitfield.
+
+
+/********************************************************************
+* XTYPE/COMPLEX *
+*********************************************************************/
+
+// XTYPE / COMPLEX / Complex add/sub halfwords.
+// XTYPE / COMPLEX / Complex add/sub words.
+// XTYPE / COMPLEX / Complex multiply 32x16.
+// XTYPE / COMPLEX / Vector reduce complex rotate.
+
+
+/********************************************************************
+* XTYPE/MPY *
+*********************************************************************/
+
+// XTYPE / COMPLEX / Complex add/sub halfwords.
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
new file mode 100644
index 0000000..0318c51
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -0,0 +1,75 @@
+//=- HexagonMachineFuctionInfo.h - Hexagon machine function info --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonMACHINEFUNCTIONINFO_H
+#define HexagonMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+ namespace Hexagon {
+ const unsigned int StartPacket = 0x1;
+ const unsigned int EndPacket = 0x2;
+ }
+
+
+/// Hexagon target-specific information for each MachineFunction.
+class HexagonMachineFunctionInfo : public MachineFunctionInfo {
+ // SRetReturnReg - Some subtargets require that sret lowering includes
+ // returning the value of the returned struct in a register. This field
+ // holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+ std::vector<MachineInstr*> AllocaAdjustInsts;
+ int VarArgsFrameIndex;
+ bool HasClobberLR;
+
+ std::map<const MachineInstr*, unsigned> PacketInfo;
+
+
+public:
+ HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0) {}
+
+ HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0),
+ HasClobberLR(0) {}
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+ void addAllocaAdjustInst(MachineInstr* MI) {
+ AllocaAdjustInsts.push_back(MI);
+ }
+ const std::vector<MachineInstr*>& getAllocaAdjustInsts() {
+ return AllocaAdjustInsts;
+ }
+
+ void setVarArgsFrameIndex(int v) { VarArgsFrameIndex = v; }
+ int getVarArgsFrameIndex() { return VarArgsFrameIndex; }
+
+ void setStartPacket(MachineInstr* MI) {
+ PacketInfo[MI] |= Hexagon::StartPacket;
+ }
+ void setEndPacket(MachineInstr* MI) {
+ PacketInfo[MI] |= Hexagon::EndPacket;
+ }
+ bool isStartPacket(const MachineInstr* MI) const {
+ return (PacketInfo.count(MI) &&
+ (PacketInfo.find(MI)->second & Hexagon::StartPacket));
+ }
+ bool isEndPacket(const MachineInstr* MI) const {
+ return (PacketInfo.count(MI) &&
+ (PacketInfo.find(MI)->second & Hexagon::EndPacket));
+ }
+ void setHasClobberLR(bool v) { HasClobberLR = v; }
+ bool hasClobberLR() const { return HasClobberLR; }
+
+};
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonOptimizeSZExtends.cpp b/lib/Target/Hexagon/HexagonOptimizeSZExtends.cpp
new file mode 100644
index 0000000..1229aca
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonOptimizeSZExtends.cpp
@@ -0,0 +1,129 @@
+//===-- HexagonOptimizeSZExtends.cpp - Identify and remove sign and -------===//
+//===-- zero extends. -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/PassSupport.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <algorithm>
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+ struct HexagonOptimizeSZExtends : public MachineFunctionPass {
+
+ public:
+ static char ID;
+ HexagonOptimizeSZExtends() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const {
+ return "Hexagon remove redundant zero and size extends";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ };
+}
+
+char HexagonOptimizeSZExtends::ID = 0;
+
+// This is a brain dead pass to get rid of redundant sign extends for the
+// following case:
+//
+// Transform the following pattern
+// %vreg170<def> = SXTW %vreg166
+// ...
+// %vreg176<def> = COPY %vreg170:subreg_loreg
+//
+// Into
+// %vreg176<def> = COPY vreg166
+
+bool HexagonOptimizeSZExtends::runOnMachineFunction(MachineFunction &MF) {
+ DenseMap<unsigned, unsigned> SExtMap;
+
+ // Loop over all of the basic blocks
+ for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock* MBB = MBBb;
+ SExtMap.clear();
+
+ // Traverse the basic block.
+ for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+ ++MII) {
+ MachineInstr *MI = MII;
+ // Look for sign extends:
+ // %vreg170<def> = SXTW %vreg166
+ if (MI->getOpcode() == Hexagon::SXTW) {
+ assert (MI->getNumOperands() == 2);
+ MachineOperand &Dst = MI->getOperand(0);
+ MachineOperand &Src = MI->getOperand(1);
+ unsigned DstReg = Dst.getReg();
+ unsigned SrcReg = Src.getReg();
+ // Just handle virtual registers.
+ if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Map the following:
+ // %vreg170<def> = SXTW %vreg166
+ // SExtMap[170] = vreg166
+ SExtMap[DstReg] = SrcReg;
+ }
+ }
+ // Look for copy:
+ // %vreg176<def> = COPY %vreg170:subreg_loreg
+ if (MI->isCopy()) {
+ assert (MI->getNumOperands() == 2);
+ MachineOperand &Dst = MI->getOperand(0);
+ MachineOperand &Src = MI->getOperand(1);
+
+ // Make sure we are copying the lower 32 bits.
+ if (Src.getSubReg() != Hexagon::subreg_loreg)
+ continue;
+
+ unsigned DstReg = Dst.getReg();
+ unsigned SrcReg = Src.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Try to find in the map.
+ if (unsigned SextSrc = SExtMap.lookup(SrcReg)) {
+ // Change the 1st operand.
+ MI->RemoveOperand(1);
+ MI->addOperand(MachineOperand::CreateReg(SextSrc, false));
+ }
+ }
+ }
+ }
+ }
+ return true;
+}
+
+FunctionPass *llvm::createHexagonOptimizeSZExtends() {
+ return new HexagonOptimizeSZExtends();
+}
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
new file mode 100644
index 0000000..521e0c1
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -0,0 +1,323 @@
+//==- HexagonRegisterInfo.cpp - Hexagon Register Information -----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <iostream>
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Function.h"
+using namespace llvm;
+
+
+HexagonRegisterInfo::HexagonRegisterInfo(HexagonSubtarget &st,
+ const HexagonInstrInfo &tii)
+ : HexagonGenRegisterInfo(Hexagon::R31),
+ Subtarget(st),
+ TII(tii) {
+}
+
+const unsigned* HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction
+ *MF)
+ const {
+ static const unsigned CalleeSavedRegsV2[] = {
+ Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0
+ };
+ static const unsigned CalleeSavedRegsV3[] = {
+ Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
+ Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23,
+ Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0
+ };
+
+ switch(Subtarget.getHexagonArchVersion()) {
+ case HexagonSubtarget::V2:
+ return CalleeSavedRegsV2;
+ break;
+ case HexagonSubtarget::V3:
+ case HexagonSubtarget::V4:
+ return CalleeSavedRegsV3;
+ break;
+ default:
+ const char *ErrorString =
+ "Callee saved registers requested for unknown archtecture version";
+ llvm_unreachable(ErrorString);
+ }
+}
+
+BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
+ const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(HEXAGON_RESERVED_REG_1);
+ Reserved.set(HEXAGON_RESERVED_REG_2);
+ Reserved.set(Hexagon::R29);
+ Reserved.set(Hexagon::R30);
+ Reserved.set(Hexagon::R31);
+ Reserved.set(Hexagon::D14);
+ Reserved.set(Hexagon::D15);
+ Reserved.set(Hexagon::LC0);
+ Reserved.set(Hexagon::LC1);
+ Reserved.set(Hexagon::SA0);
+ Reserved.set(Hexagon::SA1);
+ return Reserved;
+}
+
+
+const TargetRegisterClass* const*
+HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClassesV2[] = {
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ };
+ static const TargetRegisterClass * const CalleeSavedRegClassesV3[] = {
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
+ };
+
+ switch(Subtarget.getHexagonArchVersion()) {
+ case HexagonSubtarget::V2:
+ return CalleeSavedRegClassesV2;
+ break;
+ case HexagonSubtarget::V3:
+ case HexagonSubtarget::V4:
+ return CalleeSavedRegClassesV3;
+ break;
+ default:
+ const char *ErrorString =
+ "Callee saved register classes requested for unknown archtecture version";
+ llvm_unreachable(ErrorString);
+ }
+}
+
+void HexagonRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ MachineInstr &MI = *I;
+
+ if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) {
+ // Hexagon_TODO: add code
+ } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) {
+ // Hexagon_TODO: add code
+ } else {
+ assert(0 && "Cannot handle this call frame pseudo instruction");
+ }
+ MBB.erase(I);
+}
+
+void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+
+ //
+ // Hexagon_TODO: Do we need to enforce this for Hexagon?
+ assert(SPAdj == 0 && "Unexpected");
+
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ // Addressable stack objects are accessed using neg. offsets from %fp.
+ MachineFunction &MF = *MI.getParent()->getParent();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ unsigned FrameReg = getFrameRegister(MF);
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ if (!TFI->hasFP(MF)) {
+ // We will not reserve space on the stack for the lr and fp registers.
+ Offset -= 2 * Hexagon_WordSize;
+ }
+
+ const unsigned FrameSize = MFI.getStackSize();
+
+ if (!MFI.hasVarSizedObjects() &&
+ TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) &&
+ !TII.isSpillPredRegOp(&MI)) {
+ // Replace frame index with a stack pointer reference.
+ MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false, true);
+ MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset);
+ } else {
+ // Replace frame index with a frame pointer reference.
+ if (!TII.isValidOffset(MI.getOpcode(), Offset)) {
+
+ // If the offset overflows, then correct it.
+ //
+ // For loads, we do not need a reserved register
+ // r0 = memw(r30 + #10000) to:
+ //
+ // r0 = add(r30, #10000)
+ // r0 = memw(r0)
+ if ( (MI.getOpcode() == Hexagon::LDriw) ||
+ (MI.getOpcode() == Hexagon::LDrid) ||
+ (MI.getOpcode() == Hexagon::LDrih) ||
+ (MI.getOpcode() == Hexagon::LDriuh) ||
+ (MI.getOpcode() == Hexagon::LDrib) ||
+ (MI.getOpcode() == Hexagon::LDriub) ) {
+ unsigned dstReg = (MI.getOpcode() == Hexagon::LDrid) ?
+ *getSubRegisters(MI.getOperand(0).getReg()) :
+ MI.getOperand(0).getReg();
+
+ // Check if offset can fit in addi.
+ if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset);
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_rr),
+ dstReg).addReg(FrameReg).addReg(dstReg);
+ } else {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_ri),
+ dstReg).addReg(FrameReg).addImm(Offset);
+ }
+
+ MI.getOperand(i).ChangeToRegister(dstReg, false, false, true);
+ MI.getOperand(i+1).ChangeToImmediate(0);
+ } else if ((MI.getOpcode() == Hexagon::STriw) ||
+ (MI.getOpcode() == Hexagon::STrid) ||
+ (MI.getOpcode() == Hexagon::STrih) ||
+ (MI.getOpcode() == Hexagon::STrib) ||
+ (MI.getOpcode() == Hexagon::STriwt)) {
+ // For stores, we need a reserved register. Change
+ // memw(r30 + #10000) = r0 to:
+ //
+ // rs = add(r30, #10000);
+ // memw(rs) = r0
+ unsigned resReg = HEXAGON_RESERVED_REG_1;
+
+ // Check if offset can fit in addi.
+ if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset);
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_rr),
+ resReg).addReg(FrameReg).addReg(resReg);
+ } else {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_ri),
+ resReg).addReg(FrameReg).addImm(Offset);
+ }
+ MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
+ MI.getOperand(i+1).ChangeToImmediate(0);
+ } else if (TII.isMemOp(&MI)) {
+ unsigned resReg = HEXAGON_RESERVED_REG_1;
+ if (!MFI.hasVarSizedObjects() &&
+ TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) {
+ MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false,
+ true);
+ MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset);
+ } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset);
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_rr),
+ resReg).addReg(FrameReg).addReg(resReg);
+ MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
+ MI.getOperand(i+1).ChangeToImmediate(0);
+ } else {
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_ri),
+ resReg).addReg(FrameReg).addImm(Offset);
+ MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
+ MI.getOperand(i+1).ChangeToImmediate(0);
+ }
+ } else {
+ unsigned dstReg = MI.getOperand(0).getReg();
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset);
+ BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+ TII.get(Hexagon::ADD_rr),
+ dstReg).addReg(FrameReg).addReg(dstReg);
+ // Can we delete MI??? r2 = add (r2, #0).
+ MI.getOperand(i).ChangeToRegister(dstReg, false, false, true);
+ MI.getOperand(i+1).ChangeToImmediate(0);
+ }
+ } else {
+ // If the offset is small enough to fit in the immediate field, directly
+ // encode it.
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ }
+ }
+
+}
+
+unsigned HexagonRegisterInfo::getRARegister() const {
+ return Hexagon::R31;
+}
+
+unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction
+ &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ if (TFI->hasFP(MF)) {
+ return Hexagon::R30;
+ }
+
+ return Hexagon::R29;
+}
+
+unsigned HexagonRegisterInfo::getFrameRegister() const {
+ return Hexagon::R30;
+}
+
+unsigned HexagonRegisterInfo::getStackRegister() const {
+ return Hexagon::R29;
+}
+
+void HexagonRegisterInfo::getInitialFrameState(std::vector<MachineMove>
+ &Moves) const
+{
+ // VirtualFP = (R30 + #0).
+ unsigned FPReg = getFrameRegister();
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(FPReg, 0);
+ Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+unsigned HexagonRegisterInfo::getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned HexagonRegisterInfo::getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+#define GET_REGINFO_TARGET_DESC
+#include "HexagonGenRegisterInfo.inc"
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
new file mode 100644
index 0000000..33b0c14
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -0,0 +1,89 @@
+//==- HexagonRegisterInfo.h - Hexagon Register Information Impl --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonREGISTERINFO_H
+#define HexagonREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#define GET_REGINFO_HEADER
+#include "HexagonGenRegisterInfo.inc"
+#include "llvm/MC/MachineLocation.h"
+
+//
+// We try not to hard code the reserved registers in our code,
+// so the following two macros were defined. However, there
+// are still a few places that R11 and R10 are hard wired.
+// See below. If, in the future, we decided to change the reserved
+// register. Don't forget changing the following places.
+//
+// 1. the "Defs" set of STriw_pred in HexagonInstrInfo.td
+// 2. the "Defs" set of LDri_pred in HexagonInstrInfo.td
+// 3. the definition of "IntRegs" in HexagonRegisterInfo.td
+// 4. the definition of "DoubleRegs" in HexagonRegisterInfo.td
+//
+#define HEXAGON_RESERVED_REG_1 Hexagon::R10
+#define HEXAGON_RESERVED_REG_2 Hexagon::R11
+
+namespace llvm {
+
+class HexagonSubtarget;
+class HexagonInstrInfo;
+class Type;
+
+struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
+ HexagonSubtarget &Subtarget;
+ const HexagonInstrInfo &TII;
+
+ HexagonRegisterInfo(HexagonSubtarget &st, const HexagonInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const* getCalleeSavedRegClasses(
+ const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ /// determineFrameLayout - Determine the size of the frame and maximum call
+ /// frame size.
+ void determineFrameLayout(MachineFunction &MF) const;
+
+ /// requiresRegisterScavenging - returns true since we may need scavenging for
+ /// a temporary register when generating hardware loop instructions.
+ bool requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+ }
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+ unsigned getFrameRegister() const;
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+ unsigned getStackRegister() const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td
new file mode 100644
index 0000000..c05f844
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -0,0 +1,169 @@
+//===- HexagonRegisterInfo.td - Hexagon Register defs ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the Hexagon register file.
+//===----------------------------------------------------------------------===//
+
+class HexagonReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "Hexagon";
+}
+
+class HexagonDoubleReg<string n, list<Register> subregs> :
+ RegisterWithSubRegs<n, subregs> {
+ field bits<5> Num;
+ let Namespace = "Hexagon";
+}
+
+// Registers are identified with 5-bit ID numbers.
+// Ri - 32-bit integer registers.
+class Ri<bits<5> num, string n> : HexagonReg<n> {
+ let Num = num;
+}
+
+// Rf - 32-bit floating-point registers.
+class Rf<bits<5> num, string n> : HexagonReg<n> {
+ let Num = num;
+}
+
+
+// Rd - 64 bit registers.
+class Rd<bits<5> num, string n, list<Register> subregs> :
+HexagonDoubleReg<n, subregs> {
+ let Num = num;
+ let SubRegs = subregs;
+}
+
+
+class Rp<bits<5> num, string n> : HexagonReg<n> {
+ let Num = num;
+}
+
+class Rc<bits<5> num, string n> : HexagonReg<n> {
+ let Num = num;
+}
+
+let Namespace = "Hexagon" in {
+
+ def subreg_loreg : SubRegIndex;
+ def subreg_hireg : SubRegIndex;
+
+ // Integer registers.
+ def R0 : Ri< 0, "r0">, DwarfRegNum<[0]>;
+ def R1 : Ri< 1, "r1">, DwarfRegNum<[1]>;
+ def R2 : Ri< 2, "r2">, DwarfRegNum<[2]>;
+ def R3 : Ri< 3, "r3">, DwarfRegNum<[3]>;
+ def R4 : Ri< 4, "r4">, DwarfRegNum<[4]>;
+ def R5 : Ri< 5, "r5">, DwarfRegNum<[5]>;
+ def R6 : Ri< 6, "r6">, DwarfRegNum<[6]>;
+ def R7 : Ri< 7, "r7">, DwarfRegNum<[7]>;
+ def R8 : Ri< 8, "r8">, DwarfRegNum<[8]>;
+ def R9 : Ri< 9, "r9">, DwarfRegNum<[9]>;
+ def R10 : Ri<10, "r10">, DwarfRegNum<[10]>;
+ def R11 : Ri<11, "r11">, DwarfRegNum<[11]>;
+ def R12 : Ri<12, "r12">, DwarfRegNum<[12]>;
+ def R13 : Ri<13, "r13">, DwarfRegNum<[13]>;
+ def R14 : Ri<14, "r14">, DwarfRegNum<[14]>;
+ def R15 : Ri<15, "r15">, DwarfRegNum<[15]>;
+ def R16 : Ri<16, "r16">, DwarfRegNum<[16]>;
+ def R17 : Ri<17, "r17">, DwarfRegNum<[17]>;
+ def R18 : Ri<18, "r18">, DwarfRegNum<[18]>;
+ def R19 : Ri<19, "r19">, DwarfRegNum<[19]>;
+ def R20 : Ri<20, "r20">, DwarfRegNum<[20]>;
+ def R21 : Ri<21, "r21">, DwarfRegNum<[21]>;
+ def R22 : Ri<22, "r22">, DwarfRegNum<[22]>;
+ def R23 : Ri<23, "r23">, DwarfRegNum<[23]>;
+ def R24 : Ri<24, "r24">, DwarfRegNum<[24]>;
+ def R25 : Ri<25, "r25">, DwarfRegNum<[25]>;
+ def R26 : Ri<26, "r26">, DwarfRegNum<[26]>;
+ def R27 : Ri<27, "r27">, DwarfRegNum<[27]>;
+ def R28 : Ri<28, "r28">, DwarfRegNum<[28]>;
+ def R29 : Ri<29, "r29">, DwarfRegNum<[29]>;
+ def R30 : Ri<30, "r30">, DwarfRegNum<[30]>;
+ def R31 : Ri<31, "r31">, DwarfRegNum<[31]>;
+
+
+ def PC : Ri<31, "r31">, DwarfRegNum<[32]>;
+ def GP : Ri<31, "r31">, DwarfRegNum<[33]>;
+
+ // Aliases of the R* registers used to hold 64-bit int values (doubles).
+ let SubRegIndices = [subreg_loreg, subreg_hireg] in {
+ def D0 : Rd< 0, "r1:0", [R0, R1]>, DwarfRegNum<[32]>;
+ def D1 : Rd< 2, "r3:2", [R2, R3]>, DwarfRegNum<[34]>;
+ def D2 : Rd< 4, "r5:4", [R4, R5]>, DwarfRegNum<[36]>;
+ def D3 : Rd< 6, "r7:6", [R6, R7]>, DwarfRegNum<[38]>;
+ def D4 : Rd< 8, "r9:8", [R8, R9]>, DwarfRegNum<[40]>;
+ def D5 : Rd<10, "r11:10", [R10, R11]>, DwarfRegNum<[42]>;
+ def D6 : Rd<12, "r13:12", [R12, R13]>, DwarfRegNum<[44]>;
+ def D7 : Rd<14, "r15:14", [R14, R15]>, DwarfRegNum<[46]>;
+ def D8 : Rd<16, "r17:16", [R16, R17]>, DwarfRegNum<[48]>;
+ def D9 : Rd<18, "r19:18", [R18, R19]>, DwarfRegNum<[50]>;
+ def D10 : Rd<20, "r21:20", [R20, R21]>, DwarfRegNum<[52]>;
+ def D11 : Rd<22, "r23:22", [R22, R23]>, DwarfRegNum<[54]>;
+ def D12 : Rd<24, "r25:24", [R24, R25]>, DwarfRegNum<[56]>;
+ def D13 : Rd<26, "r27:26", [R26, R27]>, DwarfRegNum<[58]>;
+ def D14 : Rd<28, "r29:28", [R28, R29]>, DwarfRegNum<[60]>;
+ def D15 : Rd<30, "r31:30", [R30, R31]>, DwarfRegNum<[62]>;
+ }
+
+ // Predicate registers.
+ def P0 : Rp< 0, "p0">, DwarfRegNum<[63]>;
+ def P1 : Rp< 0, "p1">, DwarfRegNum<[64]>;
+ def P2 : Rp< 0, "p2">, DwarfRegNum<[65]>;
+ def P3 : Rp< 0, "p3">, DwarfRegNum<[66]>;
+
+ // Control registers.
+ def SA0 : Rc<0, "sa0">, DwarfRegNum<[67]>;
+ def LC0 : Rc<0, "lc0">, DwarfRegNum<[68]>;
+
+ def SA1 : Rc<0, "sa1">, DwarfRegNum<[69]>;
+ def LC1 : Rc<0, "lc1">, DwarfRegNum<[70]>;
+}
+
+
+
+
+
+
+
+
+
+
+// Register classes.
+//
+// FIXME: the register order should be defined in terms of the preferred
+// allocation order...
+//
+def IntRegs : RegisterClass<"Hexagon", [i32], 32, (add (sequence "R%u", 0, 9),
+ (sequence "R%u", 12, 28),
+ R10, R11, R29, R30,
+ R31)> {
+}
+
+
+
+def DoubleRegs : RegisterClass<"Hexagon", [i64], 64, (add (sequence "D%u", 0,
+ 4),
+ (sequence "D%u", 6, 13),
+ D5, D14, D15)> {
+ let SubRegClasses = [(IntRegs subreg_loreg, subreg_hireg)];
+}
+
+
+def PredRegs : RegisterClass<"Hexagon", [i1], 32, (add (sequence "P%u", 0, 3))>
+{
+ let Size = 32;
+}
+
+def CRRegs : RegisterClass<"Hexagon", [i32], 32, (add (sequence "LC%u", 0, 1),
+ (sequence "SA%u", 0, 1),
+ PC)> {
+ let Size = 32;
+}
diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
new file mode 100644
index 0000000..3ca257f
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
@@ -0,0 +1,85 @@
+//=- HexagonRemoveExtendArgs.cpp - Remove unecessary argument sign extends --=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass that removes sign extends for function parameters. These parameters
+// are already sign extended by the caller per Hexagon's ABI
+//
+//===----------------------------------------------------------------------===//
+
+
+
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "HexagonTargetMachine.h"
+#include <iostream>
+
+using namespace llvm;
+namespace {
+ struct HexagonRemoveExtendArgs : public FunctionPass {
+ public:
+ static char ID;
+ HexagonRemoveExtendArgs() : FunctionPass(ID) {}
+ virtual bool runOnFunction(Function &F);
+
+ const char *getPassName() const {
+ return "Remove sign extends";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char HexagonRemoveExtendArgs::ID = 0;
+RegisterPass<HexagonRemoveExtendArgs> X("reargs",
+ "Remove Sign and Zero Extends for Args"
+ );
+
+
+
+bool HexagonRemoveExtendArgs::runOnFunction(Function &F) {
+ unsigned Idx = 1;
+ for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE;
+ ++AI, ++Idx) {
+ if (F.paramHasAttr(Idx, Attribute::SExt)) {
+ Argument* Arg = AI;
+ if (!isa<PointerType>(Arg->getType())) {
+ for (Instruction::use_iterator UI = Arg->use_begin();
+ UI != Arg->use_end();) {
+ if (isa<SExtInst>(*UI)) {
+ Instruction* Use = cast<Instruction>(*UI);
+ SExtInst* SI = new SExtInst(Arg, Use->getType());
+ assert (EVT::getEVT(SI->getType()) ==
+ (EVT::getEVT(Use->getType())));
+ ++UI;
+ Use->replaceAllUsesWith(SI);
+ Instruction* First = F.getEntryBlock().begin();
+ SI->insertBefore(First);
+ Use->eraseFromParent();
+ } else {
+ ++UI;
+ }
+ }
+ }
+ }
+ }
+ return true;
+}
+
+
+
+FunctionPass *llvm::createHexagonRemoveExtendOps(HexagonTargetMachine &TM) {
+ return new HexagonRemoveExtendArgs();
+}
diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td
new file mode 100644
index 0000000..427d1cb
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSchedule.td
@@ -0,0 +1,53 @@
+//===-HexagonSchedule.td - Hexagon Scheduling Definitions -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Functional Units
+def LUNIT : FuncUnit;
+def LSUNIT : FuncUnit;
+def MUNIT : FuncUnit;
+def SUNIT : FuncUnit;
+
+
+// Itinerary classes
+def ALU32 : InstrItinClass;
+def ALU64 : InstrItinClass;
+def CR : InstrItinClass;
+def J : InstrItinClass;
+def JR : InstrItinClass;
+def LD : InstrItinClass;
+def M : InstrItinClass;
+def ST : InstrItinClass;
+def S : InstrItinClass;
+def PSEUDO : InstrItinClass;
+
+
+def HexagonItineraries :
+ ProcessorItineraries<[LUNIT, LSUNIT, MUNIT, SUNIT], [], [
+ InstrItinData<ALU32 , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
+ InstrItinData<ALU64 , [InstrStage<1, [MUNIT, SUNIT]>]>,
+ InstrItinData<CR , [InstrStage<1, [SUNIT]>]>,
+ InstrItinData<J , [InstrStage<1, [SUNIT, MUNIT]>]>,
+ InstrItinData<JR , [InstrStage<1, [MUNIT]>]>,
+ InstrItinData<LD , [InstrStage<1, [LUNIT, LSUNIT]>]>,
+ InstrItinData<M , [InstrStage<1, [MUNIT, SUNIT]>]>,
+ InstrItinData<ST , [InstrStage<1, [LSUNIT]>]>,
+ InstrItinData<S , [InstrStage<1, [SUNIT, MUNIT]>]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// V4 Machine Info +
+//===----------------------------------------------------------------------===//
+
+include "HexagonScheduleV4.td"
+
+//===----------------------------------------------------------------------===//
+// V4 Machine Info -
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td
new file mode 100644
index 0000000..4cf66fe
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -0,0 +1,56 @@
+//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine.
+// This file describes that machine information.
+
+//
+// |===========|==================================================|
+// | PIPELINE | Instruction Classes |
+// |===========|==================================================|
+// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM |
+// |-----------|--------------------------------------------------|
+// | SLOT1 | LD ST ALU32 |
+// |-----------|--------------------------------------------------|
+// | SLOT2 | XTYPE ALU32 J JR |
+// |-----------|--------------------------------------------------|
+// | SLOT3 | XTYPE ALU32 J CR |
+// |===========|==================================================|
+
+
+// Functional Units.
+def SLOT0 : FuncUnit;
+def SLOT1 : FuncUnit;
+def SLOT2 : FuncUnit;
+def SLOT3 : FuncUnit;
+
+// Itinerary classes.
+def NV_V4 : InstrItinClass;
+def MEM_V4 : InstrItinClass;
+// ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4.
+
+def HexagonItinerariesV4 : ProcessorItineraries<
+ [SLOT0, SLOT1, SLOT2, SLOT3], [], [
+ InstrItinData<LD , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<ST , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<ALU32 , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<NV_V4 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<MEM_V4 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<J , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<JR , [InstrStage<1, [SLOT2]>]>,
+ InstrItinData<CR , [InstrStage<1, [SLOT3]>]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S , [InstrStage<1, [SLOT2, SLOT3]>]>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Hexagon V4 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonSelectCCInfo.td b/lib/Target/Hexagon/HexagonSelectCCInfo.td
new file mode 100644
index 0000000..f21d928
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSelectCCInfo.td
@@ -0,0 +1,121 @@
+//=-HexagoSelectCCInfo.td - Selectcc mappings ----------------*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+//
+// selectcc mappings.
+//
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETEQ)),
+ (i32 (MUX_rr (i1 (CMPEQrr IntRegs:$lhs, IntRegs:$rhs)),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETNE)),
+ (i32 (MUX_rr (i1 (NOT_Ps (CMPEQrr IntRegs:$lhs, IntRegs:$rhs))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETGT)),
+ (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, IntRegs:$rhs)),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETUGT)),
+ (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs)),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETULT)),
+ (i32 (MUX_rr (i1 (NOT_Ps (CMPGTUrr IntRegs:$lhs,
+ (ADD_ri IntRegs:$rhs, -1)))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETLT)),
+ (i32 (MUX_rr (i1 (NOT_Ps (CMPGTrr IntRegs:$lhs,
+ (ADD_ri IntRegs:$rhs, -1)))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETLE)),
+ (i32 (MUX_rr (i1 (NOT_Ps (CMPGTrr IntRegs:$lhs, IntRegs:$rhs))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETULE)),
+ (i32 (MUX_rr (i1 (NOT_Ps (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+
+//
+// selectcc mappings for greater-equal-to Rs => greater-than Rs-1.
+//
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETGE)),
+ (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETUGE)),
+ (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+
+
+//
+// selectcc mappings for predicate comparisons.
+//
+// Convert Rd = selectcc(p0, p1, true_val, false_val, SETEQ) into:
+// pt = not(p1 xor p2)
+// Rd = mux(pt, true_val, false_val)
+// and similarly for SETNE
+//
+def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETNE)),
+ (i32 (MUX_rr (i1 (XOR_pp PredRegs:$lhs, PredRegs:$rhs)), IntRegs:$tval,
+ IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval,
+ IntRegs:$fval, SETEQ)),
+ (i32 (MUX_rr (i1 (NOT_pp (XOR_pp PredRegs:$lhs, PredRegs:$rhs))),
+ IntRegs:$tval, IntRegs:$fval))>;
+
+
+//
+// selectcc mappings for 64-bit operands are messy. Hexagon does not have a
+// MUX64 o, use this:
+// selectcc(Rss, Rdd, tval, fval, cond) ->
+// combine(mux(cmp_cond(Rss, Rdd), tval.hi, fval.hi),
+// mux(cmp_cond(Rss, Rdd), tval.lo, fval.lo))
+
+// setgt-64.
+def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval,
+ DoubleRegs:$fval, SETGT)),
+ (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs),
+ (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)),
+ (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs),
+ (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>;
+
+
+// setlt-64 -> setgt-64.
+def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval,
+ DoubleRegs:$fval, SETLT)),
+ (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs,
+ (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))),
+ (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg),
+ (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)),
+ (MUX_rr (CMPGT64rr DoubleRegs:$lhs,
+ (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))),
+ (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg),
+ (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>;
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
new file mode 100644
index 0000000..a52c604
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -0,0 +1,46 @@
+//===-- HexagonSelectionDAGInfo.cpp - Hexagon SelectionDAG Info -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the HexagonSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-selectiondag-info"
+#include "HexagonTargetMachine.h"
+using namespace llvm;
+
+bool llvm::flag_aligned_memcpy;
+
+HexagonSelectionDAGInfo::HexagonSelectionDAGInfo(const HexagonTargetMachine
+ &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+HexagonSelectionDAGInfo::~HexagonSelectionDAGInfo() {
+}
+
+SDValue
+HexagonSelectionDAGInfo::
+EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain,
+ SDValue Dst, SDValue Src, SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ flag_aligned_memcpy = false;
+ if ((Align & 0x3) == 0) {
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if ((SizeVal > 32) && ((SizeVal % 8) == 0))
+ flag_aligned_memcpy = true;
+ }
+ }
+
+ return SDValue();
+}
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
new file mode 100644
index 0000000..86fa026
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -0,0 +1,40 @@
+//=-- HexagonSelectionDAGInfo.h - Hexagon SelectionDAG Info ------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Hexagon subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonSELECTIONDAGINFO_H
+#define HexagonSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class HexagonTargetMachine;
+
+class HexagonSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit HexagonSelectionDAGInfo(const HexagonTargetMachine &TM);
+ ~HexagonSelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
new file mode 100644
index 0000000..f4d3647
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
@@ -0,0 +1,136 @@
+//===---- HexagonSplitTFRCondSets.cpp - split TFR condsets into xfers -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//
+//===----------------------------------------------------------------------===////
+// This pass tries to provide opportunities for better optimization of muxes.
+// The default code generated for something like: flag = (a == b) ? 1 : 3;
+// would be:
+//
+// {p0 = cmp.eq(r0,r1)}
+// {r3 = mux(p0,#1,#3)}
+//
+// This requires two packets. If we use .new predicated immediate transfers,
+// then we can do this in a single packet, e.g.:
+//
+// {p0 = cmp.eq(r0,r1)
+// if (p0.new) r3 = #1
+// if (!p0.new) r3 = #3}
+//
+// Note that the conditional assignments are not generated in .new form here.
+// We assume opptimisically that they will be formed later.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xfer"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+#include <map>
+#include <iostream>
+
+#include "llvm/Support/CommandLine.h"
+#define DEBUG_TYPE "xfer"
+
+
+using namespace llvm;
+
+namespace {
+
+class HexagonSplitTFRCondSets : public MachineFunctionPass {
+ HexagonTargetMachine& QTM;
+ const HexagonSubtarget &QST;
+
+ public:
+ static char ID;
+ HexagonSplitTFRCondSets(HexagonTargetMachine& TM) :
+ MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {}
+
+ const char *getPassName() const {
+ return "Hexagon Split TFRCondSets";
+ }
+ bool runOnMachineFunction(MachineFunction &Fn);
+};
+
+
+char HexagonSplitTFRCondSets::ID = 0;
+
+
+bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
+
+ const TargetInstrInfo *TII = QTM.getInstrInfo();
+
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+ MBBb != MBBe; ++MBBb) {
+ MachineBasicBlock* MBB = MBBb;
+ // Traverse the basic block.
+ for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+ ++MII) {
+ MachineInstr *MI = MII;
+ int Opc = MI->getOpcode();
+ if (Opc == Hexagon::TFR_condset_rr) {
+
+ int DestReg = MI->getOperand(0).getReg();
+ int SrcReg1 = MI->getOperand(2).getReg();
+ int SrcReg2 = MI->getOperand(3).getReg();
+
+ // Minor optimization: do not emit the predicated copy if the source and
+ // the destination is the same register
+ if (DestReg != SrcReg1) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_cPt),
+ DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg1);
+ }
+ if (DestReg != SrcReg2) {
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_cNotPt),
+ DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg2);
+ }
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::TFR_condset_ii) {
+ int DestReg = MI->getOperand(0).getReg();
+ int SrcReg1 = MI->getOperand(1).getReg();
+ int Immed1 = MI->getOperand(2).getImm();
+ int Immed2 = MI->getOperand(3).getImm();
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFRI_cPt),
+ DestReg).addReg(SrcReg1).addImm(Immed1);
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFRI_cNotPt),
+ DestReg).addReg(SrcReg1).addImm(Immed2);
+ MII = MBB->erase(MI);
+ --MII;
+ }
+ }
+ }
+
+ return true;
+}
+
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonSplitTFRCondSets(HexagonTargetMachine &TM) {
+ return new HexagonSplitTFRCondSets(TM);
+}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
new file mode 100644
index 0000000..83fb498
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -0,0 +1,59 @@
+//===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Hexagon specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonSubtarget.h"
+#include "Hexagon.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+#define GET_SUBTARGETINFO_CTOR
+#define GET_SUBTARGETINFO_TARGET_DESC
+#include "HexagonGenSubtargetInfo.inc"
+
+static cl::opt<bool>
+EnableV3("enable-hexagon-v3", cl::Hidden,
+ cl::desc("Enable Hexagon V3 instructions."));
+
+static cl::opt<bool>
+EnableMemOps(
+ "enable-hexagon-memops",
+ cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed,
+ cl::desc("Generate V4 MEMOP in code generation for Hexagon target"));
+
+HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
+ HexagonGenSubtargetInfo(TT, CPU, FS),
+ HexagonArchVersion(V1),
+ CPUString(CPU.str()) {
+ ParseSubtargetFeatures(CPU, FS);
+
+ switch(HexagonArchVersion) {
+ case HexagonSubtarget::V2:
+ break;
+ case HexagonSubtarget::V3:
+ EnableV3 = true;
+ break;
+ case HexagonSubtarget::V4:
+ break;
+ default:
+ llvm_unreachable("Unknown Architecture Version.");
+ }
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUString);
+
+ if (EnableMemOps)
+ UseMemOps = true;
+ else
+ UseMemOps = false;
+}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
new file mode 100644
index 0000000..6de85df
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -0,0 +1,74 @@
+//==-- HexagonSubtarget.h - Define Subtarget for the Hexagon ----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef Hexagon_SUBTARGET_H
+#define Hexagon_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "HexagonGenSubtargetInfo.inc"
+
+#define Hexagon_SMALL_DATA_THRESHOLD 8
+
+namespace llvm {
+
+class HexagonSubtarget : public HexagonGenSubtargetInfo {
+
+ bool UseMemOps;
+
+public:
+ enum HexagonArchEnum {
+ V1, V2, V3, V4
+ };
+
+ HexagonArchEnum HexagonArchVersion;
+ std::string CPUString;
+ InstrItineraryData InstrItins;
+
+public:
+ HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS);
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ bool hasV2TOps () const { return HexagonArchVersion >= V2; }
+ bool hasV2TOpsOnly () const { return HexagonArchVersion == V2; }
+ bool hasV3TOps () const { return HexagonArchVersion >= V3; }
+ bool hasV3TOpsOnly () const { return HexagonArchVersion == V3; }
+ bool hasV4TOps () const { return HexagonArchVersion >= V4; }
+ bool useMemOps () const { return HexagonArchVersion >= V4 && UseMemOps; }
+
+ bool isSubtargetV2() const { return HexagonArchVersion == V2;}
+ const std::string &getCPUString () const { return CPUString; }
+
+ // Threshold for small data section
+ unsigned getSmallDataThreshold() const {
+ return Hexagon_SMALL_DATA_THRESHOLD;
+ }
+ const HexagonArchEnum &getHexagonArchVersion() const {
+ return HexagonArchVersion;
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
new file mode 100644
index 0000000..b29e92c
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -0,0 +1,118 @@
+//===-- HexagonTargetMachine.cpp - Define TargetMachine for Hexagon -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetMachine.h"
+#include "Hexagon.h"
+#include "HexagonISelLowering.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/TargetRegistry.h"
+#include <iostream>
+
+using namespace llvm;
+
+static cl::
+opt<bool> DisableHardwareLoops(
+ "disable-hexagon-hwloops", cl::Hidden,
+ cl::desc("Disable Hardware Loops for Hexagon target"));
+
+/// HexagonTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int HexagonTargetMachineModule;
+int HexagonTargetMachineModule = 0;
+
+extern "C" void LLVMInitializeHexagonTarget() {
+ // Register the target.
+ RegisterTargetMachine<HexagonTargetMachine> X(TheHexagonTarget);
+}
+
+
+/// HexagonTargetMachine ctor - Create an ILP32 architecture model.
+///
+
+/// Hexagon_TODO: Do I need an aggregate alignment?
+///
+HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ TargetOptions Options,
+ Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ DataLayout("e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-a0:0") ,
+ Subtarget(TT, CPU, FS), TLInfo(*this), InstrInfo(Subtarget),
+ TSInfo(*this),
+ FrameLowering(Subtarget),
+ InstrItins(&Subtarget.getInstrItineraryData()) {
+ setMCUseCFI(false);
+}
+
+// addPassesForOptimizations - Allow the backend (target) to add Target
+// Independent Optimization passes to the Pass Manager.
+bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) {
+
+ PM.add(createConstantPropagationPass());
+ PM.add(createLoopSimplifyPass());
+ PM.add(createDeadCodeEliminationPass());
+ PM.add(createConstantPropagationPass());
+ PM.add(createLoopUnrollPass());
+ PM.add(createLoopStrengthReducePass(getTargetLowering()));
+ return true;
+}
+
+bool HexagonTargetMachine::addInstSelector(PassManagerBase &PM) {
+ PM.add(createHexagonRemoveExtendOps(*this));
+ PM.add(createHexagonISelDag(*this));
+ return false;
+}
+
+
+bool HexagonTargetMachine::addPreRegAlloc(PassManagerBase &PM) {
+ if (!DisableHardwareLoops) {
+ PM.add(createHexagonHardwareLoops());
+ }
+
+ return false;
+}
+
+bool HexagonTargetMachine::addPostRegAlloc(PassManagerBase &PM) {
+ PM.add(createHexagonCFGOptimizer(*this));
+ return true;
+}
+
+
+bool HexagonTargetMachine::addPreSched2(PassManagerBase &PM) {
+ PM.add(createIfConverterPass());
+ return true;
+}
+
+bool HexagonTargetMachine::addPreEmitPass(PassManagerBase &PM) {
+
+ if (!DisableHardwareLoops) {
+ PM.add(createHexagonFixupHwLoops());
+ }
+
+ // Expand Spill code for predicate registers.
+ PM.add(createHexagonExpandPredSpillCode(*this));
+
+ // Split up TFRcondsets into conditional transfers.
+ PM.add(createHexagonSplitTFRCondSets(*this));
+
+ return false;
+}
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
new file mode 100644
index 0000000..e27d3ae
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -0,0 +1,86 @@
+//=-- HexagonTargetMachine.h - Define TargetMachine for Hexagon ---*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonTARGETMACHINE_H
+#define HexagonTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonISelLowering.h"
+#include "HexagonSelectionDAGInfo.h"
+#include "HexagonFrameLowering.h"
+
+namespace llvm {
+
+class Module;
+
+class HexagonTargetMachine : public LLVMTargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment.
+ HexagonSubtarget Subtarget;
+ HexagonTargetLowering TLInfo;
+ HexagonInstrInfo InstrInfo;
+ HexagonSelectionDAGInfo TSInfo;
+ HexagonFrameLowering FrameLowering;
+ const InstrItineraryData* InstrItins;
+
+public:
+ HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU,
+ StringRef FS, TargetOptions Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL);
+
+ virtual const HexagonInstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual const HexagonSubtarget *getSubtargetImpl() const {
+ return &Subtarget;
+ }
+ virtual const HexagonRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const InstrItineraryData* getInstrItineraryData() const {
+ return InstrItins;
+ }
+
+
+ virtual const HexagonTargetLowering* getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const HexagonFrameLowering* getFrameLowering() const {
+ return &FrameLowering;
+ }
+
+ virtual const HexagonSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ // Pass Pipeline Configuration.
+ virtual bool addPassesForOptimizations(PassManagerBase &PM);
+ virtual bool addInstSelector(PassManagerBase &PM);
+ virtual bool addPreEmitPass(PassManagerBase &PM);
+ virtual bool addPreRegAlloc(llvm::PassManagerBase &PM);
+ virtual bool addPostRegAlloc(PassManagerBase &PM);
+ virtual bool addPreSched2(PassManagerBase &PM);
+};
+
+extern bool flag_aligned_memcpy;
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
new file mode 100644
index 0000000..188337d
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -0,0 +1,94 @@
+//===-- HexagonTargetObjectFile.cpp - Hexagon asm properties ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the HexagonTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/CommandLine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetObjectFile.h"
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<int> SmallDataThreshold("hexagon-small-data-threshold",
+ cl::init(8), cl::Hidden);
+
+void HexagonTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+
+ SmallDataSection =
+ getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ SmallBSSSection =
+ getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getBSS());
+}
+
+// sdata/sbss support taken largely from the MIPS Backend.
+static bool IsInSmallSection(uint64_t Size) {
+ return Size > 0 && Size <= (uint64_t)SmallDataThreshold;
+}
+/// IsGlobalInSmallSection - Return true if this global value should be
+/// placed into small data/bss section.
+bool HexagonTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const {
+ // If the primary definition of this global value is outside the current
+ // translation unit or the global value is available for inspection but not
+ // emission, then do nothing.
+ if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+ return false;
+
+ // Otherwise, Check if GV should be in sdata/sbss, when normally it would end
+ // up in getKindForGlobal(GV, TM).
+ return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global value should be
+/// placed into small data/bss section.
+bool HexagonTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+ SectionKind Kind) const {
+ // Only global variables, not functions.
+ const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+ if (!GVA)
+ return false;
+
+ if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) {
+ Type *Ty = GV->getType()->getElementType();
+ return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
+ }
+
+ return false;
+}
+
+const MCSection *HexagonTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+
+ // Handle Small Section classification here.
+ if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallBSSSection;
+ if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallDataSection;
+
+ // Otherwise, we work the same as ELF.
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.h b/lib/Target/Hexagon/HexagonTargetObjectFile.h
new file mode 100644
index 0000000..101c1f2
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -0,0 +1,40 @@
+//===-- HexagonTargetAsmInfo.h - Hexagon asm properties ---------*- C++ -*--==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonTARGETOBJECTFILE_H
+#define HexagonTARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCSectionELF.h"
+
+namespace llvm {
+
+ class HexagonTargetObjectFile : public TargetLoweringObjectFileELF {
+ const MCSectionELF *SmallDataSection;
+ const MCSectionELF *SmallBSSSection;
+ public:
+ virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+ /// IsGlobalInSmallSection - Return true if this global address should be
+ /// placed into small data/bss section.
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM,
+ SectionKind Kind) const;
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const;
+
+ const MCSection* SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind,
+ Mangler *Mang,
+ const TargetMachine &TM) const;
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonVarargsCallingConvention.h b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
new file mode 100644
index 0000000..21b2d67
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
@@ -0,0 +1,141 @@
+//==-- HexagonVarargsCallingConvention.h - Calling Conventions ---*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the functions that assign locations to outgoing function
+// arguments. Adapted from the target independent version but this handles
+// calls to varargs functions
+//
+//===----------------------------------------------------------------------===//
+//
+
+
+
+
+static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags,
+ Hexagon_CCState &State,
+ int NonVarArgsParams,
+ int CurrentParam,
+ bool ForceMem);
+
+
+static bool CC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags,
+ Hexagon_CCState &State,
+ int NonVarArgsParams,
+ int CurrentParam,
+ bool ForceMem) {
+ unsigned ByValSize = 0;
+ if (ArgFlags.isByVal() &&
+ ((ByValSize = ArgFlags.getByValSize()) >
+ (MVT(MVT::i64).getSizeInBits() / 8))) {
+ ForceMem = true;
+ }
+
+
+ // Only assign registers for named (non varargs) arguments
+ if ( !ForceMem && ((NonVarArgsParams == -1) || (CurrentParam <=
+ NonVarArgsParams))) {
+
+ if (LocVT == MVT::i32 ||
+ LocVT == MVT::i16 ||
+ LocVT == MVT::i8 ||
+ LocVT == MVT::f32) {
+ static const unsigned RegList1[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5
+ };
+ if (unsigned Reg = State.AllocateReg(RegList1, 6)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+ }
+ }
+
+ if (LocVT == MVT::i64 ||
+ LocVT == MVT::f64) {
+ static const unsigned RegList2[] = {
+ Hexagon::D0, Hexagon::D1, Hexagon::D2
+ };
+ if (unsigned Reg = State.AllocateReg(RegList2, 3)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+ }
+ }
+ }
+
+ const Type* ArgTy = LocVT.getTypeForEVT(State.getContext());
+ unsigned Alignment =
+ State.getTarget().getTargetData()->getABITypeAlignment(ArgTy);
+ unsigned Size =
+ State.getTarget().getTargetData()->getTypeSizeInBits(ArgTy) / 8;
+
+ // If it's passed by value, then we need the size of the aggregate not of
+ // the pointer.
+ if (ArgFlags.isByVal()) {
+ Size = ByValSize;
+
+ // Hexagon_TODO: Get the alignment of the contained type here.
+ Alignment = 8;
+ }
+
+ unsigned Offset3 = State.AllocateStack(Size, Alignment);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+}
+
+
+static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
+ EVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags,
+ Hexagon_CCState &State,
+ int NonVarArgsParams,
+ int CurrentParam,
+ bool ForceMem) {
+
+ if (LocVT == MVT::i32 ||
+ LocVT == MVT::f32) {
+ static const unsigned RegList1[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5
+ };
+ if (unsigned Reg = State.AllocateReg(RegList1, 6)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+ }
+ }
+
+ if (LocVT == MVT::i64 ||
+ LocVT == MVT::f64) {
+ static const unsigned RegList2[] = {
+ Hexagon::D0, Hexagon::D1, Hexagon::D2
+ };
+ if (unsigned Reg = State.AllocateReg(RegList2, 3)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+ }
+ }
+
+ const Type* ArgTy = LocVT.getTypeForEVT(State.getContext());
+ unsigned Alignment =
+ State.getTarget().getTargetData()->getABITypeAlignment(ArgTy);
+ unsigned Size =
+ State.getTarget().getTargetData()->getTypeSizeInBits(ArgTy) / 8;
+
+ unsigned Offset3 = State.AllocateStack(Size, Alignment);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3,
+ LocVT.getSimpleVT(), LocInfo));
+ return false;
+}
diff --git a/lib/Target/Hexagon/LLVMBuild.txt b/lib/Target/Hexagon/LLVMBuild.txt
new file mode 100644
index 0000000..84ea6a0
--- /dev/null
+++ b/lib/Target/Hexagon/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/Hexagon/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = TargetInfo MCTargetDesc
+
+[component_0]
+type = TargetGroup
+name = Hexagon
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = HexagonCodeGen
+parent = Hexagon
+required_libraries = AsmPrinter CodeGen Core HexagonInfo SelectionDAG Support Target MC HexagonDesc
+add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..8e3da99
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_llvm_library(LLVMHexagonDesc
+ HexagonMCTargetDesc.cpp
+ HexagonMCAsmInfo.cpp
+ )
+
+add_dependencies(LLVMHexagonDesc HexagonCommonTableGen)
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
new file mode 100644
index 0000000..188693c
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -0,0 +1,36 @@
+//===-- HexagonMCAsmInfo.cpp - Hexagon asm properties -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the HexagonMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCAsmInfo.h"
+
+using namespace llvm;
+
+HexagonMCAsmInfo::HexagonMCAsmInfo(const Target &T, StringRef TT) {
+ Data16bitsDirective = "\t.half\t";
+ Data32bitsDirective = "\t.word\t";
+ Data64bitsDirective = 0; // .xword is only supported by V9.
+ ZeroDirective = "\t.skip\t";
+ CommentString = "//";
+ HasLEB128 = true;
+
+ PrivateGlobalPrefix = ".L";
+ LCOMMDirectiveType = LCOMM::ByteAlignment;
+ InlineAsmStart = "# InlineAsm Start";
+ InlineAsmEnd = "# InlineAsm End";
+ ZeroDirective = "\t.space\t";
+ AscizDirective = "\t.string\t";
+ WeakRefDirective = "\t.weak\t";
+
+ UsesELFSectionDirectiveForBSS = true;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
new file mode 100644
index 0000000..8196e95
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
@@ -0,0 +1,30 @@
+//===-- HexagonTargetAsmInfo.h - Hexagon asm properties ---------*- C++ -*--==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the HexagonMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonMCASMINFO_H
+#define HexagonMCASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ class HexagonMCAsmInfo : public MCAsmInfo {
+ public:
+ explicit HexagonMCAsmInfo(const Target &T, StringRef TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
new file mode 100644
index 0000000..625f07c
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -0,0 +1,94 @@
+//===-- HexagonMCTargetDesc.cpp - Cell Hexagon Target Descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Cell Hexagon specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCTargetDesc.h"
+#include "HexagonMCAsmInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "HexagonGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "HexagonGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "HexagonGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createHexagonMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitHexagonMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitHexagonMCRegisterInfo(X, Hexagon::R0);
+ return X;
+}
+
+static MCSubtargetInfo *createHexagonMCSubtargetInfo(StringRef TT,
+ StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitHexagonMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCAsmInfo *createHexagonMCAsmInfo(const Target &T, StringRef TT) {
+ MCAsmInfo *MAI = new HexagonMCAsmInfo(T, TT);
+
+ // VirtualFP = (R30 + #0).
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(Hexagon::R30, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ // For the time being, use static relocations, since there's really no
+ // support for PIC yet.
+ X->InitMCCodeGenInfo(Reloc::Static, CM, OL);
+ return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeHexagonTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(TheHexagonTarget, createHexagonMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheHexagonTarget,
+ createHexagonMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget, createHexagonMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheHexagonTarget,
+ createHexagonMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheHexagonTarget,
+ createHexagonMCSubtargetInfo);
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
new file mode 100644
index 0000000..364841f
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -0,0 +1,40 @@
+//===-- SPUMCTargetDesc.h - Hexagon Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Hexagon specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUMCTARGETDESC_H
+#define SPUMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheHexagonTarget;
+
+} // End llvm namespace
+
+// Define symbolic names for Hexagon registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "HexagonGenRegisterInfo.inc"
+
+// Defines symbolic names for the Hexagon instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "HexagonGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "HexagonGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 0000000..1114d99
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = HexagonDesc
+parent = Hexagon
+required_libraries = HexagonInfo MC
+add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/MCTargetDesc/Makefile b/lib/Target/Hexagon/MCTargetDesc/Makefile
new file mode 100644
index 0000000..67be2bc
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMHexagonDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Hexagon/Makefile b/lib/Target/Hexagon/Makefile
new file mode 100644
index 0000000..c936e92
--- /dev/null
+++ b/lib/Target/Hexagon/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/Hexagon/Makefile -------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMHexagonCodeGen
+TARGET = Hexagon
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = HexagonGenRegisterInfo.inc \
+ HexagonGenInstrInfo.inc \
+ HexagonGenAsmWriter.inc \
+ HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \
+ HexagonGenCallingConv.inc \
+ HexagonAsmPrinter.cpp
+
+DIRS = TargetInfo MCTargetDesc
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Hexagon/TargetInfo/CMakeLists.txt b/lib/Target/Hexagon/TargetInfo/CMakeLists.txt
new file mode 100644
index 0000000..5b04a30
--- /dev/null
+++ b/lib/Target/Hexagon/TargetInfo/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
+ ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMHexagonInfo
+ HexagonTargetInfo.cpp
+ )
+
+add_dependencies(LLVMHexagonInfo HexagonCommonTableGen)
diff --git a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
new file mode 100644
index 0000000..7aa5dd3
--- /dev/null
+++ b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- HexagonTargetInfo.cpp - Hexagon Target Implementation ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheHexagonTarget;
+
+extern "C" void LLVMInitializeHexagonTargetInfo() {
+ RegisterTarget<Triple::hexagon, /*HasJIT=*/false> X(TheHexagonTarget, "hexagon", "Hexagon");
+}
diff --git a/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt b/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt
new file mode 100644
index 0000000..7b87be3
--- /dev/null
+++ b/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Hexagon/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = HexagonInfo
+parent = Hexagon
+required_libraries = MC Support
+add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/TargetInfo/Makefile b/lib/Target/Hexagon/TargetInfo/Makefile
new file mode 100644
index 0000000..494cca1
--- /dev/null
+++ b/lib/Target/Hexagon/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/Hexagon/TargetInfo/Makefile ----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMHexagonInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
index 358cbc8..5a42ca5 100644
--- a/lib/Target/LLVMBuild.txt
+++ b/lib/Target/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = ARM CBackend CellSPU CppBackend Hexagon MBlaze MSP430 Mips PTX PowerPC Sparc X86 XCore
+
; This is a special group whose required libraries are extended (by llvm-build)
; with the best execution engine (the native JIT, if available, or the
; interpreter).
diff --git a/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/lib/Target/MBlaze/AsmParser/CMakeLists.txt
index ec8f52a..813767b 100644
--- a/lib/Target/MBlaze/AsmParser/CMakeLists.txt
+++ b/lib/Target/MBlaze/AsmParser/CMakeLists.txt
@@ -6,11 +6,4 @@ add_llvm_library(LLVMMBlazeAsmParser
MBlazeAsmParser.cpp
)
-add_llvm_library_dependencies(LLVMMBlazeAsmParser
- LLVMMBlazeInfo
- LLVMMC
- LLVMMCParser
- LLVMSupport
- )
-
add_dependencies(LLVMMBlazeAsmParser MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/AsmParser/LLVMBuild.txt b/lib/Target/MBlaze/AsmParser/LLVMBuild.txt
index 2c61a7f..b10189a 100644
--- a/lib/Target/MBlaze/AsmParser/LLVMBuild.txt
+++ b/lib/Target/MBlaze/AsmParser/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MBlazeAsmParser
parent = MBlaze
required_libraries = MBlazeInfo MC MCParser Support
add_to_library_groups = MBlaze
-
diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt
index d3f1383..71095e5 100644
--- a/lib/Target/MBlaze/CMakeLists.txt
+++ b/lib/Target/MBlaze/CMakeLists.txt
@@ -29,19 +29,6 @@ add_llvm_target(MBlazeCodeGen
MBlazeELFWriterInfo.cpp
)
-add_llvm_library_dependencies(LLVMMBlazeCodeGen
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMBlazeAsmPrinter
- LLVMMBlazeDesc
- LLVMMBlazeInfo
- LLVMMC
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(AsmParser)
add_subdirectory(Disassembler)
add_subdirectory(InstPrinter)
diff --git a/lib/Target/MBlaze/Disassembler/CMakeLists.txt b/lib/Target/MBlaze/Disassembler/CMakeLists.txt
index e0a53ee..be2dce1 100644
--- a/lib/Target/MBlaze/Disassembler/CMakeLists.txt
+++ b/lib/Target/MBlaze/Disassembler/CMakeLists.txt
@@ -13,11 +13,4 @@ set_property(
)
endif()
-add_llvm_library_dependencies(LLVMMBlazeDisassembler
- LLVMMBlazeDesc
- LLVMMBlazeInfo
- LLVMMC
- LLVMSupport
- )
-
add_dependencies(LLVMMBlazeDisassembler MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/Disassembler/LLVMBuild.txt b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt
index c5c4f80..28dd9dc 100644
--- a/lib/Target/MBlaze/Disassembler/LLVMBuild.txt
+++ b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MBlazeDisassembler
parent = MBlaze
required_libraries = MBlazeDesc MBlazeInfo MC Support
add_to_library_groups = MBlaze
-
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
index 3087317..ccc3a05 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -123,6 +123,7 @@ static unsigned decodeSEXT(uint32_t insn) {
case 0x41: return MBlaze::SRL;
case 0x21: return MBlaze::SRC;
case 0x01: return MBlaze::SRA;
+ case 0xE0: return MBlaze::CLZ;
}
}
@@ -176,6 +177,13 @@ static unsigned decodeBR(uint32_t insn) {
}
static unsigned decodeBRI(uint32_t insn) {
+ switch (insn&0x3FFFFFF) {
+ default: break;
+ case 0x0020004: return MBlaze::IDMEMBAR;
+ case 0x0220004: return MBlaze::DMEMBAR;
+ case 0x0420004: return MBlaze::IMEMBAR;
+ }
+
switch ((insn>>16)&0x1F) {
default: return UNSUPPORTED;
case 0x00: return MBlaze::BRI;
@@ -531,6 +539,9 @@ MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr,
default:
return Fail;
+ case MBlazeII::FC:
+ break;
+
case MBlazeII::FRRRR:
if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
return Fail;
@@ -547,6 +558,13 @@ MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr,
instr.addOperand(MCOperand::CreateReg(RB));
break;
+ case MBlazeII::FRR:
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+ return Fail;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ break;
+
case MBlazeII::FRI:
switch (opcode) {
default:
diff --git a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
index aff0b3d..586e2d3 100644
--- a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
+++ b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
@@ -5,9 +5,4 @@ add_llvm_library(LLVMMBlazeAsmPrinter
MBlazeInstPrinter.cpp
)
-add_llvm_library_dependencies(LLVMMBlazeAsmPrinter
- LLVMMC
- LLVMSupport
- )
-
add_dependencies(LLVMMBlazeAsmPrinter MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt b/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt
index 7a21f1e..3a21a05 100644
--- a/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MBlazeAsmPrinter
parent = MBlaze
required_libraries = MC Support
add_to_library_groups = MBlaze
-
diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
index 570ab08..5297563 100644
--- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
+++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- MBLazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax ----===//
+//===-- MBlazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax ----===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/LLVMBuild.txt b/lib/Target/MBlaze/LLVMBuild.txt
index f1a3f5d..0b29007 100644
--- a/lib/Target/MBlaze/LLVMBuild.txt
+++ b/lib/Target/MBlaze/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
+
[component_0]
type = TargetGroup
name = MBlaze
@@ -29,4 +32,3 @@ name = MBlazeCodeGen
parent = MBlaze
required_libraries = AsmPrinter CodeGen Core MBlazeAsmPrinter MBlazeDesc MBlazeInfo MC SelectionDAG Support Target
add_to_library_groups = MBlaze
-
diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
index ff051e3..c751dd8 100644
--- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -310,9 +310,9 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
// Check if the last terminator is an unconditional branch.
MachineBasicBlock::const_iterator I = Pred->end();
- while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+ while (I != Pred->begin() && !(--I)->isTerminator())
; // Noop
- return I == Pred->end() || !I->getDesc().isBarrier();
+ return I == Pred->end() || !I->isBarrier();
}
// Force static initialization.
diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
index c07570a..19e787d 100644
--- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
+++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -29,13 +29,11 @@ using namespace llvm;
STATISTIC(FilledSlots, "Number of delay slots filled");
-namespace llvm {
-cl::opt<bool> DisableDelaySlotFiller(
+static cl::opt<bool> MBDisableDelaySlotFiller(
"disable-mblaze-delay-filler",
cl::init(false),
cl::desc("Disable the MBlaze delay slot filter."),
cl::Hidden);
-}
namespace {
struct Filler : public MachineFunctionPass {
@@ -109,7 +107,6 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
// Hazard check
MachineBasicBlock::iterator a = candidate;
MachineBasicBlock::iterator b = slot;
- MCInstrDesc desc = candidate->getDesc();
// MBB layout:-
// candidate := a0 = operation(a1, a2)
@@ -123,7 +120,7 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
// 4. b0 is one or more of {a1, a2}
// 5. a accesses memory, and the middle bit
// contains a store operation.
- bool a_is_memory = desc.mayLoad() || desc.mayStore();
+ bool a_is_memory = candidate->mayLoad() || candidate->mayStore();
// Determine the number of operands in the slot instruction and in the
// candidate instruction.
@@ -156,7 +153,7 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
}
// Check hazard type 5
- if (a_is_memory && m->getDesc().mayStore())
+ if (a_is_memory && m->mayStore())
return true;
}
@@ -183,8 +180,8 @@ static bool isDelayFiller(MachineBasicBlock &MBB,
if (candidate == MBB.begin())
return false;
- MCInstrDesc brdesc = (--candidate)->getDesc();
- return (brdesc.hasDelaySlot());
+ --candidate;
+ return (candidate->hasDelaySlot());
}
static bool hasUnknownSideEffects(MachineBasicBlock::iterator &I) {
@@ -211,9 +208,8 @@ findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) {
break;
--I;
- MCInstrDesc desc = I->getDesc();
- if (desc.hasDelaySlot() || desc.isBranch() || isDelayFiller(MBB,I) ||
- desc.isCall() || desc.isReturn() || desc.isBarrier() ||
+ if (I->hasDelaySlot() || I->isBranch() || isDelayFiller(MBB,I) ||
+ I->isCall() || I->isReturn() || I->isBarrier() ||
hasUnknownSideEffects(I))
break;
@@ -232,11 +228,11 @@ findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) {
bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
- if (I->getDesc().hasDelaySlot()) {
+ if (I->hasDelaySlot()) {
MachineBasicBlock::iterator D = MBB.end();
MachineBasicBlock::iterator J = I;
- if (!DisableDelaySlotFiller)
+ if (!MBDisableDelaySlotFiller)
D = findDelayInstr(MBB,I);
++FilledSlots;
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
index f28d5a7..37919bc 100644
--- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
@@ -32,13 +32,11 @@
using namespace llvm;
-namespace llvm {
- cl::opt<bool> DisableStackAdjust(
- "disable-mblaze-stack-adjust",
- cl::init(false),
- cl::desc("Disable MBlaze stack layout adjustment."),
- cl::Hidden);
-}
+static cl::opt<bool> MBDisableStackAdjust(
+ "disable-mblaze-stack-adjust",
+ cl::init(false),
+ cl::desc("Disable MBlaze stack layout adjustment."),
+ cl::Hidden);
static void replaceFrameIndexes(MachineFunction &MF,
SmallVector<std::pair<int,int64_t>, 16> &FR) {
@@ -85,7 +83,7 @@ static void replaceFrameIndexes(MachineFunction &MF,
//===----------------------------------------------------------------------===//
static void analyzeFrameIndexes(MachineFunction &MF) {
- if (DisableStackAdjust) return;
+ if (MBDisableStackAdjust) return;
MachineFrameInfo *MFI = MF.getFrameInfo();
MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
@@ -336,7 +334,8 @@ int MBlazeFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI)
// if frame pointer elimination is disabled.
bool MBlazeFrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects();
}
void MBlazeFrameLowering::emitPrologue(MachineFunction &MF) const {
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 148d906..0002174 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -167,7 +167,9 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td
index 54f605f..4c6034d 100644
--- a/lib/Target/MBlaze/MBlazeInstrFormats.td
+++ b/lib/Target/MBlaze/MBlazeInstrFormats.td
@@ -35,6 +35,7 @@ def FRIR : Format<17>; // RSUBI
def FRRRR : Format<18>; // RSUB, FRSUB
def FRI : Format<19>; // RSUB, FRSUB
def FC : Format<20>; // NOP
+def FRR : Format<21>; // CLZ
//===----------------------------------------------------------------------===//
// Describe MBlaze instructions format
@@ -202,3 +203,26 @@ class MSR<bits<6> op, bits<6> flags, dag outs, dag ins, string asmstr,
let Inst{11-16} = flags;
let Inst{17-31} = imm15;
}
+
+//===----------------------------------------------------------------------===//
+// TCLZ instruction class in MBlaze : <|opcode|rd|imm15|>
+//===----------------------------------------------------------------------===//
+class TCLZ<bits<6> op, bits<16> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op, FRR, outs, ins, asmstr, pattern, itin> {
+ bits<5> rd;
+ bits<5> ra;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-31} = flags;
+}
+
+//===----------------------------------------------------------------------===//
+// MBAR instruction class in MBlaze : <|opcode|rd|imm15|>
+//===----------------------------------------------------------------------===//
+class MBAR<bits<6> op, bits<26> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<op, FC, outs, ins, asmstr, pattern, itin> {
+ let Inst{6-31} = flags;
+}
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td
index 1d8c987..9fe2a49 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -594,9 +594,18 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
//===----------------------------------------------------------------------===//
let neverHasSideEffects = 1 in {
- def NOP : MBlazeInst< 0x20, FC, (outs), (ins), "nop ", [], IIC_ALU>;
+ def NOP : MBlazeInst<0x20, FC, (outs), (ins), "nop ", [], IIC_ALU>;
}
+let Predicates=[HasPatCmp] in {
+ def CLZ : TCLZ<0x24, 0x00E0, (outs GPR:$dst), (ins GPR:$src),
+ "clz $dst, $src", [], IIC_ALU>;
+}
+
+def IMEMBAR : MBAR<0x2E, 0x0420004, (outs), (ins), "mbar 2", [], IIC_ALU>;
+def DMEMBAR : MBAR<0x2E, 0x0220004, (outs), (ins), "mbar 1", [], IIC_ALU>;
+def IDMEMBAR : MBAR<0x2E, 0x0020004, (outs), (ins), "mbar 0", [], IIC_ALU>;
+
let usesCustomInserter = 1 in {
def Select_CC : MBlazePseudo<(outs GPR:$dst),
(ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC), // F T reversed
@@ -751,6 +760,56 @@ def : Pat<(sra GPR:$L, GPR:$R), (ShiftRA GPR:$L, GPR:$R)>;
def : Pat<(srl GPR:$L, GPR:$R), (ShiftRL GPR:$L, GPR:$R)>;
// SET_CC operations
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 1)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETNE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 2)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETLT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETLE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 6)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETUGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU (i32 R0), GPR:$L), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETULT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU (i32 R0), GPR:$L), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETUGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU (i32 R0), GPR:$L), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETULE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU (i32 R0), GPR:$L), 6)>;
+
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETEQ),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 1)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETNE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 2)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 3)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETLT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 4)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 5)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETLE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 6)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETUGT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, (i32 R0)), 3)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETULT),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, (i32 R0)), 4)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETUGE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, (i32 R0)), 5)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETULE),
+ (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+ (CMPU GPR:$R, (i32 R0)), 6)>;
+
def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ),
(Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
(CMP GPR:$R, GPR:$L), 1)>;
@@ -787,6 +846,68 @@ def : Pat<(select (i32 GPR:$C), (i32 GPR:$T), (i32 GPR:$F)),
(Select_CC GPR:$T, GPR:$F, GPR:$C, 2)>;
// SELECT_CC
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETEQ),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 1)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETNE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 2)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETGT),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETLT),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETGE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETLE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$L, 6)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETULT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+ (i32 GPR:$T), (i32 GPR:$F), SETULE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 6)>;
+
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETEQ),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 1)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETNE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 2)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETGT),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 3)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETLT),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 4)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETGE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 5)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETLE),
+ (Select_CC GPR:$T, GPR:$F, GPR:$R, 6)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 3)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETULT),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 4)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETUGE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 5)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+ (i32 GPR:$T), (i32 GPR:$F), SETULE),
+ (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 6)>;
+
def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
(i32 GPR:$T), (i32 GPR:$F), SETEQ),
(Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 1)>;
@@ -827,6 +948,48 @@ def : Pat<(br bb:$T), (BRID bb:$T)>;
def : Pat<(brind GPR:$T), (BRAD GPR:$T)>;
// BRCOND instructions
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETEQ), bb:$T),
+ (BEQID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETNE), bb:$T),
+ (BNEID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETGT), bb:$T),
+ (BGTID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETLT), bb:$T),
+ (BLTID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETGE), bb:$T),
+ (BGEID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETLE), bb:$T),
+ (BLEID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETUGT), bb:$T),
+ (BGTID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETULT), bb:$T),
+ (BLTID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETUGE), bb:$T),
+ (BGEID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETULE), bb:$T),
+ (BLEID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETEQ), bb:$T),
+ (BEQID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETNE), bb:$T),
+ (BNEID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETGT), bb:$T),
+ (BGTID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETLT), bb:$T),
+ (BLTID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETGE), bb:$T),
+ (BGEID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETLE), bb:$T),
+ (BLEID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETUGT), bb:$T),
+ (BGTID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETULT), bb:$T),
+ (BLTID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETUGE), bb:$T),
+ (BGEID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETULE), bb:$T),
+ (BLEID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+
def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ), bb:$T),
(BEQID (CMP GPR:$R, GPR:$L), bb:$T)>;
def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETNE), bb:$T),
@@ -869,11 +1032,11 @@ def : Pat<(store (i32 GPR:$dst), xaddr:$addr), (SW GPR:$dst, xaddr:$addr)>;
def : Pat<(load xaddr:$addr), (i32 (LW xaddr:$addr))>;
// 16-bit load and store
-def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$addr), (SH GPR:$dst, xaddr:$addr)>;
+def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$ad), (SH GPR:$dst, xaddr:$ad)>;
def : Pat<(zextloadi16 xaddr:$addr), (i32 (LHU xaddr:$addr))>;
// 8-bit load and store
-def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$addr), (SB GPR:$dst, xaddr:$addr)>;
+def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$ad), (SB GPR:$dst, xaddr:$ad)>;
def : Pat<(zextloadi8 xaddr:$addr), (i32 (LBU xaddr:$addr))>;
// Peepholes
diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/lib/Target/MBlaze/MBlazeMCInstLower.cpp
index a7e400b..7e5598f 100644
--- a/lib/Target/MBlaze/MBlazeMCInstLower.cpp
+++ b/lib/Target/MBlaze/MBlazeMCInstLower.cpp
@@ -1,4 +1,4 @@
-//===-- MBLazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===//
+//===-- MBlazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 4ad7bd6..5ed81dd 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -33,16 +33,16 @@ extern "C" void LLVMInitializeMBlazeTarget() {
// an easier handling.
MBlazeTargetMachine::
MBlazeTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL):
- LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
- Subtarget(TT, CPU, FS),
- DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
- InstrInfo(*this),
- FrameLowering(Subtarget),
- TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()) {
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS),
+ DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
+ InstrInfo(*this),
+ FrameLowering(Subtarget),
+ TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
+ InstrItins(Subtarget.getInstrItineraryData()) {
}
// Install an instruction selector pass using
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index 1c1aa53..036f1b6 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -43,6 +43,7 @@ namespace llvm {
public:
MBlazeTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
diff --git a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
index 37871b6..6fa7f43 100644
--- a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
@@ -5,11 +5,4 @@ add_llvm_library(LLVMMBlazeDesc
MBlazeMCTargetDesc.cpp
)
-add_llvm_library_dependencies(LLVMMBlazeDesc
- LLVMMBlazeAsmPrinter
- LLVMMBlazeInfo
- LLVMMC
- LLVMSupport
- )
-
add_dependencies(LLVMMBlazeDesc MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt b/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt
index e89811b..4982f0f 100644
--- a/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MBlazeDesc
parent = MBlaze
required_libraries = MBlazeAsmPrinter MBlazeInfo MC Support
add_to_library_groups = MBlaze
-
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
index 08f7d46..d5acbe9 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
@@ -58,6 +58,11 @@ public:
bool MayNeedRelaxation(const MCInst &Inst) const;
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const;
+
void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
@@ -87,6 +92,18 @@ bool MBlazeAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
return hasExprOrImm;
}
+bool MBlazeAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // FIXME: Is this right? It's what the "generic" code was doing before,
+ // but is X86 specific. Is it actually true for MBlaze also, or was it
+ // just close enough to not be a big deal?
+ //
+ // Relax if the value is too big for a (signed) i8.
+ return int64_t(Value) != int64_t(int8_t(Value));
+}
+
void MBlazeAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
Res = Inst;
Res.setOpcode(getRelaxedOpcode(Inst.getOpcode()));
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
index 776dbc4..c8bdd6f 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
@@ -51,6 +51,7 @@ namespace MBlazeII {
FRRRR,
FRI,
FC,
+ FRR,
FormMask = 63
//===------------------------------------------------------------------===//
diff --git a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
index 93fce58..b554d9b 100644
--- a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
+++ b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
@@ -5,10 +5,4 @@ add_llvm_library(LLVMMBlazeInfo
MBlazeTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMMBlazeInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMMBlazeInfo MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt b/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt
index 938a1d9..ba7ee5d 100644
--- a/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MBlazeInfo
parent = MBlaze
required_libraries = MC Support Target
add_to_library_groups = MBlaze
-
diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt
index 55c2d7d..7daa7a2 100644
--- a/lib/Target/MSP430/CMakeLists.txt
+++ b/lib/Target/MSP430/CMakeLists.txt
@@ -22,19 +22,6 @@ add_llvm_target(MSP430CodeGen
MSP430MCInstLower.cpp
)
-add_llvm_library_dependencies(LLVMMSP430CodeGen
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMMSP430AsmPrinter
- LLVMMSP430Desc
- LLVMMSP430Info
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/lib/Target/MSP430/InstPrinter/CMakeLists.txt
index ce39d95..64ac994 100644
--- a/lib/Target/MSP430/InstPrinter/CMakeLists.txt
+++ b/lib/Target/MSP430/InstPrinter/CMakeLists.txt
@@ -4,9 +4,4 @@ add_llvm_library(LLVMMSP430AsmPrinter
MSP430InstPrinter.cpp
)
-add_llvm_library_dependencies(LLVMMSP430AsmPrinter
- LLVMMC
- LLVMSupport
- )
-
add_dependencies(LLVMMSP430AsmPrinter MSP430CommonTableGen)
diff --git a/lib/Target/MSP430/InstPrinter/LLVMBuild.txt b/lib/Target/MSP430/InstPrinter/LLVMBuild.txt
index aeb863a..37b8c25 100644
--- a/lib/Target/MSP430/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/MSP430/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MSP430AsmPrinter
parent = MSP430
required_libraries = MC Support
add_to_library_groups = MSP430
-
diff --git a/lib/Target/MSP430/LLVMBuild.txt b/lib/Target/MSP430/LLVMBuild.txt
index 024312b..51d9702 100644
--- a/lib/Target/MSP430/LLVMBuild.txt
+++ b/lib/Target/MSP430/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
[component_0]
type = TargetGroup
name = MSP430
@@ -27,4 +30,3 @@ name = MSP430CodeGen
parent = MSP430
required_libraries = AsmPrinter CodeGen Core MC MSP430AsmPrinter MSP430Desc MSP430Info SelectionDAG Support Target
add_to_library_groups = MSP430
-
diff --git a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
index c2dd448..adc95c5 100644
--- a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
@@ -3,12 +3,4 @@ add_llvm_library(LLVMMSP430Desc
MSP430MCAsmInfo.cpp
)
-add_llvm_library_dependencies(LLVMMSP430Desc
- LLVMMC
- LLVMMSP430AsmPrinter
- LLVMMSP430Info
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMMSP430Desc MSP430CommonTableGen)
diff --git a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt
index 1890e9d..3319d93 100644
--- a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MSP430Desc
parent = MSP430
required_libraries = MC MSP430AsmPrinter MSP430Info Support Target
add_to_library_groups = MSP430
-
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index c99f4ab..e406ff2 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
bool MSP430FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- return (DisableFramePointerElim(MF) ||
+ return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
MF.getFrameInfo()->hasVarSizedObjects() ||
MFI->isFrameAddressTaken());
}
@@ -140,7 +140,7 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
while (MBBI != MBB.begin()) {
MachineBasicBlock::iterator PI = prior(MBBI);
unsigned Opc = PI->getOpcode();
- if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator())
+ if (Opc != MSP430::POP16r && !PI->isTerminator())
break;
--MBBI;
}
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 5c94137..884d69b 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -122,8 +122,12 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
setOperationAction(ISD::CTTZ, MVT::i8, Expand);
setOperationAction(ISD::CTTZ, MVT::i16, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand);
setOperationAction(ISD::CTLZ, MVT::i8, Expand);
setOperationAction(ISD::CTLZ, MVT::i16, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand);
setOperationAction(ISD::CTPOP, MVT::i8, Expand);
setOperationAction(ISD::CTPOP, MVT::i16, Expand);
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 81f766e..9d3c7e9 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -158,13 +158,12 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
}
bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isTerminator()) return false;
+ if (!MI->isTerminator()) return false;
// Conditional branch is a special case.
- if (MCID.isBranch() && !MCID.isBarrier())
+ if (MI->isBranch() && !MI->isBarrier())
return true;
- if (!MCID.isPredicable())
+ if (!MI->isPredicable())
return true;
return !isPredicated(MI);
}
@@ -189,7 +188,7 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// A terminator that isn't a branch can't easily be handled
// by this analysis.
- if (!I->getDesc().isBranch())
+ if (!I->isBranch())
return true;
// Cannot handle indirect branches.
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index fe185fb..a0fc3da 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -28,9 +28,10 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T,
StringRef TT,
StringRef CPU,
StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS),
// FIXME: Check TargetData string.
DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index 4fb060f..28d482a 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -39,7 +39,7 @@ class MSP430TargetMachine : public LLVMTargetMachine {
public:
MSP430TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
diff --git a/lib/Target/MSP430/TargetInfo/CMakeLists.txt b/lib/Target/MSP430/TargetInfo/CMakeLists.txt
index 1526946..f6b40ea 100644
--- a/lib/Target/MSP430/TargetInfo/CMakeLists.txt
+++ b/lib/Target/MSP430/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMMSP430Info
MSP430TargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMMSP430Info
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMMSP430Info MSP430CommonTableGen)
diff --git a/lib/Target/MSP430/TargetInfo/LLVMBuild.txt b/lib/Target/MSP430/TargetInfo/LLVMBuild.txt
index a745ea8..deafc2d 100644
--- a/lib/Target/MSP430/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/MSP430/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MSP430Info
parent = MSP430
required_libraries = MC Support Target
add_to_library_groups = MSP430
-
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index ac9cfc0..a13c0e8 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -29,19 +29,6 @@ add_llvm_target(MipsCodeGen
MipsSelectionDAGInfo.cpp
)
-add_llvm_library_dependencies(LLVMMipsCodeGen
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMMipsAsmPrinter
- LLVMMipsDesc
- LLVMMipsInfo
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Mips/InstPrinter/CMakeLists.txt b/lib/Target/Mips/InstPrinter/CMakeLists.txt
index c45b35d..3e9fbf1 100644
--- a/lib/Target/Mips/InstPrinter/CMakeLists.txt
+++ b/lib/Target/Mips/InstPrinter/CMakeLists.txt
@@ -4,9 +4,4 @@ add_llvm_library(LLVMMipsAsmPrinter
MipsInstPrinter.cpp
)
-add_llvm_library_dependencies(LLVMMipsAsmPrinter
- LLVMMC
- LLVMSupport
- )
-
add_dependencies(LLVMMipsAsmPrinter MipsCommonTableGen)
diff --git a/lib/Target/Mips/InstPrinter/LLVMBuild.txt b/lib/Target/Mips/InstPrinter/LLVMBuild.txt
index d953a61..317057b 100644
--- a/lib/Target/Mips/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/Mips/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MipsAsmPrinter
parent = Mips
required_libraries = MC Support
add_to_library_groups = Mips
-
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index f544d39..3e9c46a 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -96,10 +96,14 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
case MCSymbolRefExpr::VK_None: break;
case MCSymbolRefExpr::VK_Mips_GPREL: OS << "%gp_rel("; break;
case MCSymbolRefExpr::VK_Mips_GOT_CALL: OS << "%call16("; break;
+ case MCSymbolRefExpr::VK_Mips_GOT16: OS << "%got("; break;
case MCSymbolRefExpr::VK_Mips_GOT: OS << "%got("; break;
case MCSymbolRefExpr::VK_Mips_ABS_HI: OS << "%hi("; break;
case MCSymbolRefExpr::VK_Mips_ABS_LO: OS << "%lo("; break;
case MCSymbolRefExpr::VK_Mips_TLSGD: OS << "%tlsgd("; break;
+ case MCSymbolRefExpr::VK_Mips_TLSLDM: OS << "%tlsldm("; break;
+ case MCSymbolRefExpr::VK_Mips_DTPREL_HI:OS << "%dtprel_hi("; break;
+ case MCSymbolRefExpr::VK_Mips_DTPREL_LO:OS << "%dtprel_lo("; break;
case MCSymbolRefExpr::VK_Mips_GOTTPREL: OS << "%gottprel("; break;
case MCSymbolRefExpr::VK_Mips_TPREL_HI: OS << "%tprel_hi("; break;
case MCSymbolRefExpr::VK_Mips_TPREL_LO: OS << "%tprel_lo("; break;
diff --git a/lib/Target/Mips/LLVMBuild.txt b/lib/Target/Mips/LLVMBuild.txt
index e733b52..bcd32bc 100644
--- a/lib/Target/Mips/LLVMBuild.txt
+++ b/lib/Target/Mips/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
[component_0]
type = TargetGroup
name = Mips
@@ -28,4 +31,3 @@ name = MipsCodeGen
parent = Mips
required_libraries = AsmPrinter CodeGen Core MC MipsAsmPrinter MipsDesc MipsInfo SelectionDAG Support Target
add_to_library_groups = Mips
-
diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
index 2ceb5c9..0eb0a55 100644
--- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
@@ -5,11 +5,4 @@ add_llvm_library(LLVMMipsDesc
MipsMCTargetDesc.cpp
)
-add_llvm_library_dependencies(LLVMMipsDesc
- LLVMMC
- LLVMMipsAsmPrinter
- LLVMMipsInfo
- LLVMSupport
- )
-
add_dependencies(LLVMMipsDesc MipsCommonTableGen)
diff --git a/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt b/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt
index d6f5dd2..29f5da6 100644
--- a/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MipsDesc
parent = Mips
required_libraries = MC MipsAsmPrinter MipsInfo Support
add_to_library_groups = Mips
-
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 7bc5fe4..60ff4fe 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -29,13 +29,19 @@
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
+// Prepare value for the target space for it
static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
// Add/subtract and shift
switch (Kind) {
default:
+ return 0;
+ case FK_GPRel_4:
+ case FK_Data_4:
+ case Mips::fixup_Mips_LO16:
break;
case Mips::fixup_Mips_PC16:
// So far we are only using this type for branches.
@@ -52,25 +58,10 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
// address range.
Value >>= 2;
break;
- }
-
- // Mask off value for placement as an operand
- switch (Kind) {
- default:
- break;
- case FK_GPRel_4:
- case FK_Data_4:
- Value &= 0xffffffff;
- break;
- case Mips::fixup_Mips_26:
- Value &= 0x03ffffff;
- break;
- case Mips::fixup_Mips_LO16:
- case Mips::fixup_Mips_PC16:
- Value &= 0x0000ffff;
- break;
case Mips::fixup_Mips_HI16:
- Value >>= 16;
+ case Mips::fixup_Mips_GOT_Local:
+ // Get the higher 16-bits. Also add 1 if bit 15 is 1.
+ Value = (Value >> 16) + ((Value & 0x8000) != 0);
break;
}
@@ -96,42 +87,40 @@ public:
/// fixup kind as appropriate.
void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const {
- unsigned Kind = (unsigned)Fixup.getKind();
- Value = adjustFixupValue(Kind, Value);
+ MCFixupKind Kind = Fixup.getKind();
+ Value = adjustFixupValue((unsigned)Kind, Value);
if (!Value)
- return; // Doesn't change encoding.
+ return; // Doesn't change encoding.
unsigned Offset = Fixup.getOffset();
- switch (Kind) {
- default:
- llvm_unreachable("Unknown fixup kind!");
- case Mips::fixup_Mips_GOT16: // This will be fixed up at link time
- break;
- case FK_GPRel_4:
- case FK_Data_4:
- case Mips::fixup_Mips_26:
- case Mips::fixup_Mips_LO16:
- case Mips::fixup_Mips_PC16:
- case Mips::fixup_Mips_HI16:
- // For each byte of the fragment that the fixup touches, mask i
- // the fixup value. The Value has been "split up" into the appr
- // bitfields above.
- for (unsigned i = 0; i != 4; ++i) // FIXME - Need to support 2 and 8 bytes
- Data[Offset + i] += uint8_t((Value >> (i * 8)) & 0xff);
- break;
+ // FIXME: The below code will not work across endian models
+ // How many bytes/bits are we fixing up?
+ unsigned NumBytes = ((getFixupKindInfo(Kind).TargetSize-1)/8)+1;
+ uint64_t Mask = ((uint64_t)1 << getFixupKindInfo(Kind).TargetSize) - 1;
+
+ // Grab current value, if any, from bits.
+ uint64_t CurVal = 0;
+ for (unsigned i = 0; i != NumBytes; ++i)
+ CurVal |= ((uint8_t)Data[Offset + i]) << (i * 8);
+
+ CurVal = (CurVal & ~Mask) | ((CurVal + Value) & Mask);
+
+ // Write out the bytes back to the code/data bits.
+ // First the unaffected bits and then the fixup.
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ Data[Offset + i] = uint8_t((CurVal >> (i * 8)) & 0xff);
}
- }
+}
unsigned getNumFixupKinds() const { return Mips::NumTargetFixupKinds; }
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
const static MCFixupKindInfo Infos[Mips::NumTargetFixupKinds] = {
- // This table *must* be in the order that the fixup_* kinds a
+ // This table *must* be in same the order of fixup_* kinds in
// MipsFixupKinds.h.
//
// name offset bits flags
- { "fixup_Mips_NONE", 0, 0, 0 },
{ "fixup_Mips_16", 0, 16, 0 },
{ "fixup_Mips_32", 0, 32, 0 },
{ "fixup_Mips_REL32", 0, 32, 0 },
@@ -140,7 +129,8 @@ public:
{ "fixup_Mips_LO16", 0, 16, 0 },
{ "fixup_Mips_GPREL16", 0, 16, 0 },
{ "fixup_Mips_LITERAL", 0, 16, 0 },
- { "fixup_Mips_GOT16", 0, 16, 0 },
+ { "fixup_Mips_GOT_Global", 0, 16, 0 },
+ { "fixup_Mips_GOT_Local", 0, 16, 0 },
{ "fixup_Mips_PC16", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_Mips_CALL16", 0, 16, 0 },
{ "fixup_Mips_GPREL32", 0, 32, 0 },
@@ -173,6 +163,17 @@ public:
return false;
}
+ /// fixupNeedsRelaxation - Target specific predicate for whether a given
+ /// fixup requires the associated instruction to be relaxed.
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // FIXME.
+ assert(0 && "RelaxInstruction() unimplemented");
+ return false;
+ }
+
/// RelaxInstruction - Relax the instruction in the given fragment
/// to the next wider instruction.
///
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index cebfde0..00fc5df 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -31,8 +31,9 @@ namespace MipsII {
MO_NO_FLAG,
- /// MO_GOT - Represents the offset into the global offset table at which
+ /// MO_GOT16 - Represents the offset into the global offset table at which
/// the address the relocation entry symbol resides during execution.
+ MO_GOT16,
MO_GOT,
/// MO_GOT_CALL - Represents the offset into the global offset table at
@@ -55,6 +56,13 @@ namespace MipsII {
// Dynamic TLS).
MO_TLSGD,
+ /// MO_TLSLDM - Represents the offset into the global offset table at which
+ // the module ID and TSL block offset reside during execution (Local
+ // Dynamic TLS).
+ MO_TLSLDM,
+ MO_DTPREL_HI,
+ MO_DTPREL_LO,
+
/// MO_GOTTPREL - Represents the offset from the thread pointer (Initial
// Exec TLS).
MO_GOTTPREL,
@@ -180,6 +188,7 @@ inline static unsigned getMipsRegisterNumbering(unsigned RegEnum)
case Mips::D14:
return 28;
case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64:
+ case Mips::HWR29:
return 29;
case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64:
case Mips::D15:
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index 20890ed..a56c002 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -14,74 +14,82 @@
namespace llvm {
namespace Mips {
- enum Fixups {
- // fixup_Mips_xxx - R_MIPS_NONE
- fixup_Mips_NONE = FirstTargetFixupKind,
+ // Although most of the current fixup types reflect a unique relocation
+ // one can have multiple fixup types for a given relocation and thus need
+ // to be uniquely named.
+ //
+ // This table *must* be in the save order of
+ // MCFixupKindInfo Infos[Mips::NumTargetFixupKinds]
+ // in MipsAsmBackend.cpp.
+ //
+ enum Fixups {
+ // Branch fixups resulting in R_MIPS_16.
+ fixup_Mips_16 = FirstTargetFixupKind,
- // fixup_Mips_xxx - R_MIPS_16.
- fixup_Mips_16,
+ // Pure 32 bit data fixup resulting in - R_MIPS_32.
+ fixup_Mips_32,
- // fixup_Mips_xxx - R_MIPS_32.
- fixup_Mips_32,
+ // Full 32 bit data relative data fixup resulting in - R_MIPS_REL32.
+ fixup_Mips_REL32,
- // fixup_Mips_xxx - R_MIPS_REL32.
- fixup_Mips_REL32,
+ // Jump 26 bit fixup resulting in - R_MIPS_26.
+ fixup_Mips_26,
- // fixup_Mips_xxx - R_MIPS_26.
- fixup_Mips_26,
+ // Pure upper 16 bit fixup resulting in - R_MIPS_HI16.
+ fixup_Mips_HI16,
- // fixup_Mips_xxx - R_MIPS_HI16.
- fixup_Mips_HI16,
+ // Pure lower 16 bit fixup resulting in - R_MIPS_LO16.
+ fixup_Mips_LO16,
- // fixup_Mips_xxx - R_MIPS_LO16.
- fixup_Mips_LO16,
+ // 16 bit fixup for GP offest resulting in - R_MIPS_GPREL16.
+ fixup_Mips_GPREL16,
- // fixup_Mips_xxx - R_MIPS_GPREL16.
- fixup_Mips_GPREL16,
+ // 16 bit literal fixup resulting in - R_MIPS_LITERAL.
+ fixup_Mips_LITERAL,
- // fixup_Mips_xxx - R_MIPS_LITERAL.
- fixup_Mips_LITERAL,
+ // Global symbol fixup resulting in - R_MIPS_GOT16.
+ fixup_Mips_GOT_Global,
- // fixup_Mips_xxx - R_MIPS_GOT16.
- fixup_Mips_GOT16,
+ // Local symbol fixup resulting in - R_MIPS_GOT16.
+ fixup_Mips_GOT_Local,
- // fixup_Mips_xxx - R_MIPS_PC16.
- fixup_Mips_PC16,
+ // PC relative branch fixup resulting in - R_MIPS_PC16.
+ fixup_Mips_PC16,
- // fixup_Mips_xxx - R_MIPS_CALL16.
- fixup_Mips_CALL16,
+ // resulting in - R_MIPS_CALL16.
+ fixup_Mips_CALL16,
- // fixup_Mips_xxx - R_MIPS_GPREL32.
- fixup_Mips_GPREL32,
+ // resulting in - R_MIPS_GPREL32.
+ fixup_Mips_GPREL32,
- // fixup_Mips_xxx - R_MIPS_SHIFT5.
- fixup_Mips_SHIFT5,
+ // resulting in - R_MIPS_SHIFT5.
+ fixup_Mips_SHIFT5,
- // fixup_Mips_xxx - R_MIPS_SHIFT6.
- fixup_Mips_SHIFT6,
+ // resulting in - R_MIPS_SHIFT6.
+ fixup_Mips_SHIFT6,
- // fixup_Mips_xxx - R_MIPS_64.
- fixup_Mips_64,
+ // Pure 64 bit data fixup resulting in - R_MIPS_64.
+ fixup_Mips_64,
- // fixup_Mips_xxx - R_MIPS_TLS_GD.
- fixup_Mips_TLSGD,
+ // resulting in - R_MIPS_TLS_GD.
+ fixup_Mips_TLSGD,
- // fixup_Mips_xxx - R_MIPS_TLS_GOTTPREL.
- fixup_Mips_GOTTPREL,
+ // resulting in - R_MIPS_TLS_GOTTPREL.
+ fixup_Mips_GOTTPREL,
- // fixup_Mips_xxx - R_MIPS_TLS_TPREL_HI16.
- fixup_Mips_TPREL_HI,
+ // resulting in - R_MIPS_TLS_TPREL_HI16.
+ fixup_Mips_TPREL_HI,
- // fixup_Mips_xxx - R_MIPS_TLS_TPREL_LO16.
- fixup_Mips_TPREL_LO,
+ // resulting in - R_MIPS_TLS_TPREL_LO16.
+ fixup_Mips_TPREL_LO,
- // fixup_Mips_xxx - yyy. // This should become R_MIPS_PC16
- fixup_Mips_Branch_PCRel,
+ // PC relative branch fixup resulting in - R_MIPS_PC16
+ fixup_Mips_Branch_PCRel,
- // Marker
- LastTargetFixupKind,
- NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
- };
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+ };
} // namespace Mips
} // namespace llvm
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 0c3cbb3..463dcfe 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -194,8 +194,11 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
case MCSymbolRefExpr::VK_Mips_GOT_CALL:
FixupKind = Mips::fixup_Mips_CALL16;
break;
+ case MCSymbolRefExpr::VK_Mips_GOT16:
+ FixupKind = Mips::fixup_Mips_GOT_Global;
+ break;
case MCSymbolRefExpr::VK_Mips_GOT:
- FixupKind = Mips::fixup_Mips_GOT16;
+ FixupKind = Mips::fixup_Mips_GOT_Local;
break;
case MCSymbolRefExpr::VK_Mips_ABS_HI:
FixupKind = Mips::fixup_Mips_HI16;
@@ -245,8 +248,8 @@ unsigned
MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
assert(MI.getOperand(OpNo).isImm());
- unsigned szEncoding = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups);
- return szEncoding - 1;
+ unsigned SizeEncoding = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups);
+ return SizeEncoding - 1;
}
// FIXME: should be called getMSBEncoding
@@ -256,10 +259,10 @@ MipsMCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
assert(MI.getOperand(OpNo-1).isImm());
assert(MI.getOperand(OpNo).isImm());
- unsigned pos = getMachineOpValue(MI, MI.getOperand(OpNo-1), Fixups);
- unsigned sz = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups);
+ unsigned Position = getMachineOpValue(MI, MI.getOperand(OpNo-1), Fixups);
+ unsigned Size = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups);
- return pos + sz - 1;
+ return Position + Size - 1;
}
#include "MipsGenMCCodeEmitter.inc"
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 39c2c16..e9e0f60 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -79,9 +79,9 @@ def FeatureMips64r2 : SubtargetFeature<"mips64r2", "MipsArchVersion",
class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, MipsGenericItineraries, Features>;
-def : Proc<"mips32r1", [FeatureMips32]>;
-def : Proc<"4ke", [FeatureMips32r2]>;
-def : Proc<"mips64r1", [FeatureMips64]>;
+def : Proc<"mips32", [FeatureMips32]>;
+def : Proc<"mips32r2", [FeatureMips32r2]>;
+def : Proc<"mips64", [FeatureMips64]>;
def : Proc<"mips64r2", [FeatureMips64r2]>;
def MipsAsmWriter : AsmWriter {
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index b0fb4fa..2996986 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -25,7 +25,7 @@ def uimm16_64 : Operand<i64> {
// Transformation Function - get Imm - 32.
def Subtract32 : SDNodeXForm<imm, [{
- return getI32Imm((unsigned)N->getZExtValue() - 32);
+ return getImm(N, (unsigned)N->getZExtValue() - 32);
}]>;
// shamt field must fit in 5 bits.
@@ -36,6 +36,19 @@ def imm32_63 : ImmLeaf<i32,
[{return (int32_t)Imm >= 32 && (int32_t)Imm < 64;}],
Subtract32>;
+// Is a 32-bit int.
+def immSExt32 : ImmLeaf<i64, [{return isInt<32>(Imm);}]>;
+
+// Transformation Function - get the higher 16 bits.
+def HIGHER : SDNodeXForm<imm, [{
+ return getImm(N, (N->getZExtValue() >> 32) & 0xFFFF);
+}]>;
+
+// Transformation Function - get the highest 16 bits.
+def HIGHEST : SDNodeXForm<imm, [{
+ return getImm(N, (N->getZExtValue() >> 48) & 0xFFFF);
+}]>;
+
//===----------------------------------------------------------------------===//
// Instructions specific format
//===----------------------------------------------------------------------===//
@@ -206,6 +219,17 @@ let Uses = [SP_64] in
def DynAlloc64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>,
Requires<[IsN64]>;
+def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>;
+
+def DEXT : ExtBase<3, "dext", CPU64Regs>;
+def DINS : InsBase<7, "dins", CPU64Regs>;
+
+def DSLL64_32 : FR<0x3c, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
+ "dsll32\t$rd, $rt, 0", [], IIAlu>;
+
+def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
+ "sll\t$rd, $rt, 0", [], IIAlu>;
+
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
//===----------------------------------------------------------------------===//
@@ -216,9 +240,15 @@ def : Pat<(i64 immSExt16:$in),
def : Pat<(i64 immZExt16:$in),
(ORi64 ZERO_64, imm:$in)>;
+// 32-bit immediates
+def : Pat<(i64 immSExt32:$imm),
+ (ORi64 (LUi64 (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
// Arbitrary immediates
def : Pat<(i64 imm:$imm),
- (ORi64 (LUi64 (HI16 imm:$imm)), (LO16 imm:$imm))>;
+ (ORi64 (DSLL (ORi64 (DSLL (ORi64 (LUi64 (HIGHEST imm:$imm)),
+ (HIGHER imm:$imm)), 16), (HI16 imm:$imm)), 16),
+ (LO16 imm:$imm))>;
// extended loads
let Predicates = [NotN64] in {
@@ -236,11 +266,13 @@ def : Pat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>;
def : Pat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>;
def : Pat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>;
def : Pat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>;
+def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>;
def : Pat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>;
def : Pat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>;
def : Pat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>;
def : Pat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>;
+def : Pat<(MipsLo tglobaltlsaddr:$in), (DADDiu ZERO_64, tglobaltlsaddr:$in)>;
def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)),
(DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>;
@@ -250,6 +282,15 @@ def : Pat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)),
(DADDiu CPU64Regs:$hi, tjumptable:$lo)>;
def : Pat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)),
(DADDiu CPU64Regs:$hi, tconstpool:$lo)>;
+def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaltlsaddr:$lo)),
+ (DADDiu CPU64Regs:$hi, tglobaltlsaddr:$lo)>;
+
+def : WrapperPat<tglobaladdr, DADDiu, GP_64>;
+def : WrapperPat<tconstpool, DADDiu, GP_64>;
+def : WrapperPat<texternalsym, DADDiu, GP_64>;
+def : WrapperPat<tblockaddress, DADDiu, GP_64>;
+def : WrapperPat<tjumptable, DADDiu, GP_64>;
+def : WrapperPat<tglobaltlsaddr, DADDiu, GP_64>;
defm : BrcondPats<CPU64Regs, BEQ64, BNE64, SLT64, SLTu64, SLTi64, SLTiu64,
ZERO_64>;
@@ -268,3 +309,6 @@ def : Pat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>, Requires<[IsN64]>;
def : Pat<(i32 (trunc CPU64Regs:$src)),
(SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>, Requires<[IsN64]>;
+// 32-to-64-bit extension
+def : Pat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>;
+def : Pat<(i64 (zext CPURegs:$src)), (DSRL32 (DSLL64_32 CPURegs:$src), 0)>;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index d27e3ab..a5505d3 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -96,19 +96,17 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (!OutStreamer.hasRawTextSupport()) {
// Lower CPLOAD and CPRESTORE
- if (Opc == Mips::CPLOAD) {
+ if (Opc == Mips::CPLOAD)
MCInstLowering.LowerCPLOAD(MI, MCInsts);
- for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I
- != MCInsts.end(); ++I)
+ else if (Opc == Mips::CPRESTORE)
+ MCInstLowering.LowerCPRESTORE(MI, MCInsts);
+
+ if (!MCInsts.empty()) {
+ for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin();
+ I != MCInsts.end(); ++I)
OutStreamer.EmitInstruction(*I);
return;
}
-
- if (Opc == Mips::CPRESTORE) {
- MCInstLowering.LowerCPRESTORE(MI, TmpInst0);
- OutStreamer.EmitInstruction(TmpInst0);
- return;
- }
}
OutStreamer.EmitInstruction(TmpInst0);
@@ -317,9 +315,9 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
// Otherwise, check the last instruction.
// Check if the last terminator is an unconditional branch.
MachineBasicBlock::const_iterator I = Pred->end();
- while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) ;
+ while (I != Pred->begin() && !(--I)->isTerminator()) ;
- return !I->getDesc().isBarrier();
+ return !I->isBarrier();
}
// Print out an operand for an inline asm expression.
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
index a8f29ae..6b26e24 100644
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -144,7 +144,7 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB){
MCE.StartMachineBasicBlock(MBB);
- for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
I != E; ++I)
emitInstruction(*I);
}
@@ -161,7 +161,7 @@ unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI,
if (Form == MipsII::FrmJ)
return Mips::reloc_mips_26;
if ((Form == MipsII::FrmI || Form == MipsII::FrmFI)
- && MI.getDesc().isBranch())
+ && MI.isBranch())
return Mips::reloc_mips_branch;
if (Form == MipsII::FrmI && MI.getOpcode() == Mips::LUi)
return Mips::reloc_mips_hi;
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index be3b7a0..1d9e9b0 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -96,7 +96,7 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) {
LastFiller = MBB.end();
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
- if (I->getDesc().hasDelaySlot()) {
+ if (I->hasDelaySlot()) {
++FilledSlots;
Changed = true;
@@ -146,7 +146,7 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB,
|| I->isInlineAsm()
|| I->isLabel()
|| FI == LastFiller
- || I->getDesc().isPseudo()
+ || I->isPseudo()
//
// Should not allow:
// ERET, DERET or WAIT, PAUSE. Need to add these to instruction
@@ -174,16 +174,15 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
if (candidate->isImplicitDef() || candidate->isKill())
return true;
- MCInstrDesc MCID = candidate->getDesc();
// Loads or stores cannot be moved past a store to the delay slot
// and stores cannot be moved past a load.
- if (MCID.mayLoad()) {
+ if (candidate->mayLoad()) {
if (sawStore)
return true;
sawLoad = true;
}
- if (MCID.mayStore()) {
+ if (candidate->mayStore()) {
if (sawStore)
return true;
sawStore = true;
@@ -191,7 +190,7 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
return true;
}
- assert((!MCID.isCall() && !MCID.isReturn()) &&
+ assert((!candidate->isCall() && !candidate->isReturn()) &&
"Cannot put calls or returns in delay slot.");
for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
@@ -221,11 +220,11 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
SmallSet<unsigned, 32>& RegUses) {
// If MI is a call or return, just examine the explicit non-variadic operands.
MCInstrDesc MCID = MI->getDesc();
- unsigned e = MCID.isCall() || MCID.isReturn() ? MCID.getNumOperands() :
- MI->getNumOperands();
+ unsigned e = MI->isCall() || MI->isReturn() ? MCID.getNumOperands() :
+ MI->getNumOperands();
// Add RA to RegDefs to prevent users of RA from going into delay slot.
- if (MCID.isCall())
+ if (MI->isCall())
RegDefs.insert(Mips::RA);
for (unsigned i = 0; i != e; ++i) {
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 36aef99..2466545 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -85,8 +85,8 @@ using namespace llvm;
// if frame pointer elimination is disabled.
bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()
- || MFI->isFrameAddressTaken();
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
}
bool MipsFrameLowering::targetHandlesStackFrameRounding() const {
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 9c831ed..b17239d 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -86,10 +86,9 @@ private:
// Complex Pattern.
bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset);
- // getI32Imm - Return a target constant with the specified
- // value, of type i32.
- inline SDValue getI32Imm(unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i32);
+ // getImm - Return a target constant with the specified value.
+ inline SDValue getImm(const SDNode *Node, unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
}
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
@@ -122,21 +121,16 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
}
// on PIC code Load GA
- if (TM.getRelocationModel() == Reloc::PIC_) {
- if (Addr.getOpcode() == MipsISD::WrapperPIC) {
- Base = CurDAG->getRegister(GPReg, ValTy);
- Offset = Addr.getOperand(0);
- return true;
- }
- } else {
+ if (Addr.getOpcode() == MipsISD::Wrapper) {
+ Base = CurDAG->getRegister(GPReg, ValTy);
+ Offset = Addr.getOperand(0);
+ return true;
+ }
+
+ if (TM.getRelocationModel() != Reloc::PIC_) {
if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
Addr.getOpcode() == ISD::TargetGlobalAddress))
return false;
- else if (Addr.getOpcode() == ISD::TargetGlobalTLSAddress) {
- Base = CurDAG->getRegister(GPReg, ValTy);
- Offset = Addr;
- return true;
- }
}
// Addresses of the form FI+const or FI|const
@@ -310,13 +304,24 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
}
case MipsISD::ThreadPointer: {
- unsigned SrcReg = Mips::HWR29;
- unsigned DestReg = Mips::V1;
- SDNode *Rdhwr = CurDAG->getMachineNode(Mips::RDHWR, Node->getDebugLoc(),
- Node->getValueType(0), CurDAG->getRegister(SrcReg, MVT::i32));
+ EVT PtrVT = TLI.getPointerTy();
+ unsigned RdhwrOpc, SrcReg, DestReg;
+
+ if (PtrVT == MVT::i32) {
+ RdhwrOpc = Mips::RDHWR;
+ SrcReg = Mips::HWR29;
+ DestReg = Mips::V1;
+ } else {
+ RdhwrOpc = Mips::RDHWR64;
+ SrcReg = Mips::HWR29_64;
+ DestReg = Mips::V1_64;
+ }
+
+ SDNode *Rdhwr = CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(),
+ Node->getValueType(0), CurDAG->getRegister(SrcReg, PtrVT));
SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg,
SDValue(Rdhwr, 0));
- SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, MVT::i32);
+ SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, PtrVT);
ReplaceUses(SDValue(Node, 0), ResNode);
return ResNode.getNode();
}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index b5a15cf..c9b657c 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -40,11 +40,11 @@ using namespace llvm;
// mask (Pos), and return true.
// For example, if I is 0x003ff800, (Pos, Size) = (11, 11).
static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
- if (!isUInt<32>(I) || !isShiftedMask_32(I))
+ if (!isShiftedMask_64(I))
return false;
- Size = CountPopulation_32(I);
- Pos = CountTrailingZeros_32(I);
+ Size = CountPopulation_64(I);
+ Pos = CountTrailingZeros_64(I);
return true;
}
@@ -54,9 +54,6 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::Hi: return "MipsISD::Hi";
case MipsISD::Lo: return "MipsISD::Lo";
case MipsISD::GPRel: return "MipsISD::GPRel";
- case MipsISD::TlsGd: return "MipsISD::TlsGd";
- case MipsISD::TprelHi: return "MipsISD::TprelHi";
- case MipsISD::TprelLo: return "MipsISD::TprelLo";
case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer";
case MipsISD::Ret: return "MipsISD::Ret";
case MipsISD::FPBrcond: return "MipsISD::FPBrcond";
@@ -72,7 +69,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::DivRemU: return "MipsISD::DivRemU";
case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64";
case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
- case MipsISD::WrapperPIC: return "MipsISD::WrapperPIC";
+ case MipsISD::Wrapper: return "MipsISD::Wrapper";
case MipsISD::DynAlloc: return "MipsISD::DynAlloc";
case MipsISD::Sync: return "MipsISD::Sync";
case MipsISD::Ext: return "MipsISD::Ext";
@@ -129,7 +126,9 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
@@ -157,6 +156,10 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i64, Expand);
@@ -555,20 +558,20 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
return SDValue();
SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1);
-
+ unsigned ShiftRightOpc = ShiftRight.getOpcode();
+
// Op's first operand must be a shift right.
- if (ShiftRight.getOpcode() != ISD::SRA && ShiftRight.getOpcode() != ISD::SRL)
+ if (ShiftRightOpc != ISD::SRA && ShiftRightOpc != ISD::SRL)
return SDValue();
// The second operand of the shift must be an immediate.
- uint64_t Pos;
ConstantSDNode *CN;
if (!(CN = dyn_cast<ConstantSDNode>(ShiftRight.getOperand(1))))
return SDValue();
- Pos = CN->getZExtValue();
-
+ uint64_t Pos = CN->getZExtValue();
uint64_t SMPos, SMSize;
+
// Op's second operand must be a shifted mask.
if (!(CN = dyn_cast<ConstantSDNode>(Mask)) ||
!IsShiftedMask(CN->getZExtValue(), SMPos, SMSize))
@@ -576,10 +579,11 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
// Return if the shifted mask does not start at bit 0 or the sum of its size
// and Pos exceeds the word's size.
- if (SMPos != 0 || Pos + SMSize > 32)
+ EVT ValTy = N->getValueType(0);
+ if (SMPos != 0 || Pos + SMSize > ValTy.getSizeInBits())
return SDValue();
- return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), MVT::i32,
+ return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), ValTy,
ShiftRight.getOperand(0),
DAG.getConstant(Pos, MVT::i32),
DAG.getConstant(SMSize, MVT::i32));
@@ -630,10 +634,11 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG,
// Return if the shift amount and the first bit position of mask are not the
// same.
- if (Shamt != SMPos0)
+ EVT ValTy = N->getValueType(0);
+ if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits()))
return SDValue();
- return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), MVT::i32,
+ return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), ValTy,
Shl.getOperand(0),
DAG.getConstant(SMPos0, MVT::i32),
DAG.getConstant(SMSize0, MVT::i32),
@@ -1485,9 +1490,9 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
(GV->hasLocalLinkage() && !isa<Function>(GV)));
unsigned GotFlag = IsN64 ?
(HasGotOfst ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT_DISP) :
- MipsII::MO_GOT;
+ (HasGotOfst ? MipsII::MO_GOT : MipsII::MO_GOT16);
SDValue GA = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, GotFlag);
- GA = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, GA);
+ GA = DAG.getNode(MipsISD::Wrapper, dl, ValTy, GA);
SDValue ResNode = DAG.getLoad(ValTy, dl,
DAG.getEntryNode(), GA, MachinePointerInfo(),
false, false, false, 0);
@@ -1523,7 +1528,7 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
SDValue BAGOTOffset = DAG.getBlockAddress(BA, ValTy, true, GOTFlag);
- BAGOTOffset = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, BAGOTOffset);
+ BAGOTOffset = DAG.getNode(MipsISD::Wrapper, dl, ValTy, BAGOTOffset);
SDValue BALOOffset = DAG.getBlockAddress(BA, ValTy, true, OFSTFlag);
SDValue Load = DAG.getLoad(ValTy, dl,
DAG.getEntryNode(), BAGOTOffset,
@@ -1535,9 +1540,9 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
SDValue MipsTargetLowering::
LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
{
- // If the relocation model is PIC, use the General Dynamic TLS Model,
- // otherwise use the Initial Exec or Local Exec TLS Model.
- // TODO: implement Local Dynamic TLS model
+ // If the relocation model is PIC, use the General Dynamic TLS Model or
+ // Local Dynamic TLS model, otherwise use the Initial Exec or
+ // Local Exec TLS Model.
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
DebugLoc dl = GA->getDebugLoc();
@@ -1546,45 +1551,59 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
// General Dynamic TLS Model
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32,
- 0, MipsII::MO_TLSGD);
- SDValue Tlsgd = DAG.getNode(MipsISD::TlsGd, dl, MVT::i32, TGA);
- SDValue GP = DAG.getRegister(Mips::GP, MVT::i32);
- SDValue Argument = DAG.getNode(ISD::ADD, dl, MVT::i32, GP, Tlsgd);
+ bool LocalDynamic = GV->hasInternalLinkage();
+ unsigned Flag = LocalDynamic ? MipsII::MO_TLSLDM :MipsII::MO_TLSGD;
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Flag);
+ SDValue Argument = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, TGA);
+ unsigned PtrSize = PtrVT.getSizeInBits();
+ IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize);
+
+ SDValue TlsGetAddr = DAG.getExternalSymbol("__tls_get_addr", PtrVT);
ArgListTy Args;
ArgListEntry Entry;
Entry.Node = Argument;
- Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
+ Entry.Ty = PtrTy;
Args.push_back(Entry);
+
std::pair<SDValue, SDValue> CallResult =
- LowerCallTo(DAG.getEntryNode(),
- (Type *) Type::getInt32Ty(*DAG.getContext()),
- false, false, false, false, 0, CallingConv::C, false, true,
- DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG,
- dl);
-
- return CallResult.first;
+ LowerCallTo(DAG.getEntryNode(), PtrTy,
+ false, false, false, false, 0, CallingConv::C, false, true,
+ TlsGetAddr, Args, DAG, dl);
+
+ SDValue Ret = CallResult.first;
+
+ if (!LocalDynamic)
+ return Ret;
+
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ MipsII::MO_DTPREL_HI);
+ SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi);
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ MipsII::MO_DTPREL_LO);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo);
+ SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Ret);
+ return DAG.getNode(ISD::ADD, dl, PtrVT, Add, Lo);
}
SDValue Offset;
if (GV->isDeclaration()) {
// Initial Exec TLS Model
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
MipsII::MO_GOTTPREL);
- Offset = DAG.getLoad(MVT::i32, dl,
+ TGA = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, TGA);
+ Offset = DAG.getLoad(PtrVT, dl,
DAG.getEntryNode(), TGA, MachinePointerInfo(),
false, false, false, 0);
} else {
// Local Exec TLS Model
- SDVTList VTs = DAG.getVTList(MVT::i32);
- SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
MipsII::MO_TPREL_HI);
- SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
MipsII::MO_TPREL_LO);
- SDValue Hi = DAG.getNode(MipsISD::TprelHi, dl, VTs, &TGAHi, 1);
- SDValue Lo = DAG.getNode(MipsISD::TprelLo, dl, MVT::i32, TGALo);
- Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
+ SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo);
+ Offset = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
}
SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT);
@@ -1594,34 +1613,29 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
SDValue MipsTargetLowering::
LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
{
- SDValue ResNode;
- SDValue HiPart;
+ SDValue HiPart, JTI, JTILo;
// FIXME there isn't actually debug info here
DebugLoc dl = Op.getDebugLoc();
bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
- unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HI;
-
EVT PtrVT = Op.getValueType();
- JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
-
- if (!IsPIC) {
- SDValue Ops[] = { JTI };
- HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1);
+ if (!IsPIC && !IsN64) {
+ JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MipsII::MO_ABS_HI);
+ HiPart = DAG.getNode(MipsISD::Hi, dl, PtrVT, JTI);
+ JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MipsII::MO_ABS_LO);
} else {// Emit Load from Global Pointer
- JTI = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, JTI);
- HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI,
- MachinePointerInfo(),
- false, false, false, 0);
+ unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
+ unsigned OfstFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
+ JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, GOTFlag);
+ JTI = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, JTI);
+ HiPart = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), JTI,
+ MachinePointerInfo(), false, false, false, 0);
+ JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OfstFlag);
}
- SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
- MipsII::MO_ABS_LO);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTILo);
- ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
-
- return ResNode;
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, JTILo);
+ return DAG.getNode(ISD::ADD, dl, PtrVT, HiPart, Lo);
}
SDValue MipsTargetLowering::
@@ -1657,7 +1671,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
SDValue CP = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(),
N->getOffset(), GOTFlag);
- CP = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, CP);
+ CP = DAG.getNode(MipsISD::Wrapper, dl, ValTy, CP);
SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(),
CP, MachinePointerInfo::getConstantPool(),
false, false, false, 0);
@@ -1685,21 +1699,29 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(SV),
false, false, 0);
}
-
-static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG) {
+
+// Called if the size of integer registers is large enough to hold the whole
+// floating point number.
+static SDValue LowerFCOPYSIGNLargeIntReg(SDValue Op, SelectionDAG &DAG) {
// FIXME: Use ext/ins instructions if target architecture is Mips32r2.
+ EVT ValTy = Op.getValueType();
+ EVT IntValTy = MVT::getIntegerVT(ValTy.getSizeInBits());
+ uint64_t Mask = (uint64_t)1 << (ValTy.getSizeInBits() - 1);
DebugLoc dl = Op.getDebugLoc();
- SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(0));
- SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(1));
- SDValue And0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op0,
- DAG.getConstant(0x7fffffff, MVT::i32));
- SDValue And1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op1,
- DAG.getConstant(0x80000000, MVT::i32));
- SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, And0, And1);
- return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Result);
+ SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntValTy, Op.getOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntValTy, Op.getOperand(1));
+ SDValue And0 = DAG.getNode(ISD::AND, dl, IntValTy, Op0,
+ DAG.getConstant(Mask - 1, IntValTy));
+ SDValue And1 = DAG.getNode(ISD::AND, dl, IntValTy, Op1,
+ DAG.getConstant(Mask, IntValTy));
+ SDValue Result = DAG.getNode(ISD::OR, dl, IntValTy, And0, And1);
+ return DAG.getNode(ISD::BITCAST, dl, ValTy, Result);
}
-static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool isLittle) {
+// Called if the size of integer registers is not large enough to hold the whole
+// floating point number (e.g. f64 & 32-bit integer register).
+static SDValue
+LowerFCOPYSIGNSmallIntReg(SDValue Op, SelectionDAG &DAG, bool isLittle) {
// FIXME:
// Use ext/ins instructions if target architecture is Mips32r2.
// Eliminate redundant mfc1 and mtc1 instructions.
@@ -1734,10 +1756,10 @@ SDValue MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG)
assert(Ty == MVT::f32 || Ty == MVT::f64);
- if (Ty == MVT::f32)
- return LowerFCOPYSIGN32(Op, DAG);
+ if (Ty == MVT::f32 || HasMips64)
+ return LowerFCOPYSIGNLargeIntReg(Op, DAG);
else
- return LowerFCOPYSIGN64(Op, DAG, Subtarget->isLittle());
+ return LowerFCOPYSIGNSmallIntReg(Op, DAG, Subtarget->isLittle());
}
SDValue MipsTargetLowering::
@@ -2328,7 +2350,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
// node so that legalize doesn't hack it.
unsigned char OpFlag;
bool IsPICCall = (IsN64 || IsPIC); // true if calls are translated to jalr $25
- bool LoadSymAddr = false;
+ bool GlobalOrExternal = false;
SDValue CalleeLo;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
@@ -2345,7 +2367,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
getPointerTy(), 0, OpFlag);
}
- LoadSymAddr = true;
+ GlobalOrExternal = true;
}
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
if (IsN64 || (!IsO32 && IsPIC))
@@ -2356,16 +2378,16 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
OpFlag = MipsII::MO_GOT_CALL;
Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
getPointerTy(), OpFlag);
- LoadSymAddr = true;
+ GlobalOrExternal = true;
}
SDValue InFlag;
// Create nodes that load address of callee and copy it to T9
if (IsPICCall) {
- if (LoadSymAddr) {
+ if (GlobalOrExternal) {
// Load callee address
- Callee = DAG.getNode(MipsISD::WrapperPIC, dl, getPointerTy(), Callee);
+ Callee = DAG.getNode(MipsISD::Wrapper, dl, getPointerTy(), Callee);
SDValue LoadValue = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
Callee, MachinePointerInfo::getGOT(),
false, false, false, 0);
@@ -2377,7 +2399,11 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
} else
Callee = LoadValue;
}
+ }
+ // T9 should contain the address of the callee function if
+ // -reloction-model=pic or it is an indirect call.
+ if (IsPICCall || !GlobalOrExternal) {
// copy to T9
unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0));
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index f2b64e3..81d093f 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -40,13 +40,6 @@ namespace llvm {
// Handle gp_rel (small data/bss sections) relocation.
GPRel,
- // General Dynamic TLS
- TlsGd,
-
- // Local Exec TLS
- TprelHi,
- TprelLo,
-
// Thread Pointer
ThreadPointer,
@@ -79,7 +72,7 @@ namespace llvm {
BuildPairF64,
ExtractElementF64,
- WrapperPIC,
+ Wrapper,
DynAlloc,
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index e1725fa..21a1862 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -115,7 +115,7 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
let Inst{15-0} = imm16;
}
-class CBranchBase<bits<6> op, dag outs, dag ins, string asmstr,
+class BranchBase<bits<6> op, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
MipsInst<outs, ins, asmstr, pattern, itin, FrmI>
{
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 5358dc0..ea101f7 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -29,8 +29,8 @@ using namespace llvm;
MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm)
: MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
TM(tm), IsN64(TM.getSubtarget<MipsSubtarget>().isABI_N64()),
- RI(*TM.getSubtargetImpl(), *this) {}
-
+ RI(*TM.getSubtargetImpl(), *this),
+ UncondBrOpc(TM.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J) {}
const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const {
return RI;
@@ -236,7 +236,8 @@ static unsigned GetAnalyzableBrOpc(unsigned Opc) {
Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ ||
Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 ||
Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 ||
- Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::J) ?
+ Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B ||
+ Opc == Mips::J) ?
Opc : 0;
}
@@ -320,7 +321,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// If there is only one terminator instruction, process it.
if (!SecondLastOpc) {
// Unconditional branch
- if (LastOpc == Mips::J) {
+ if (LastOpc == UncondBrOpc) {
TBB = LastInst->getOperand(0).getMBB();
return false;
}
@@ -337,7 +338,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// If second to last instruction is an unconditional branch,
// analyze it and remove the last instruction.
- if (SecondLastOpc == Mips::J) {
+ if (SecondLastOpc == UncondBrOpc) {
// Return if the last instruction cannot be removed.
if (!AllowModify)
return true;
@@ -349,7 +350,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// Conditional branch followed by an unconditional branch.
// The last one must be unconditional.
- if (LastOpc != Mips::J)
+ if (LastOpc != UncondBrOpc)
return true;
AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
@@ -391,14 +392,14 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
// Two-way Conditional branch.
if (FBB) {
BuildCondBr(MBB, TBB, DL, Cond);
- BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(FBB);
return 2;
}
// One way branch.
// Unconditional branch.
if (Cond.empty())
- BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(TBB);
else // Conditional branch.
BuildCondBr(MBB, TBB, DL, Cond);
return 1;
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 8fa3052..70cc2cf 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -34,6 +34,7 @@ class MipsInstrInfo : public MipsGenInstrInfo {
MipsTargetMachine &TM;
bool IsN64;
const MipsRegisterInfo RI;
+ unsigned UncondBrOpc;
public:
explicit MipsInstrInfo(MipsTargetMachine &TM);
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 0ae94ab..9fcc5fd 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -107,7 +107,7 @@ def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
// movn %got(d)($gp), %got(c)($gp), $4
// This instruction is illegal since movn can take only register operands.
-def MipsWrapperPIC : SDNode<"MipsISD::WrapperPIC", SDTIntUnaryOp>;
+def MipsWrapper : SDNode<"MipsISD::Wrapper", SDTIntUnaryOp>;
// Pointer to dynamically allocated stack area.
def MipsDynAlloc : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc,
@@ -132,6 +132,8 @@ def NotMips64 : Predicate<"!Subtarget.hasMips64()">;
def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">;
def IsN64 : Predicate<"Subtarget.isABI_N64()">;
def NotN64 : Predicate<"!Subtarget.isABI_N64()">;
+def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">;
//===----------------------------------------------------------------------===//
// Mips Operand, Complex Patterns and Transformations Definitions.
@@ -194,12 +196,12 @@ def size_ins : Operand<i32> {
// Transformation Function - get the lower 16 bits.
def LO16 : SDNodeXForm<imm, [{
- return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF);
+ return getImm(N, N->getZExtValue() & 0xFFFF);
}]>;
// Transformation Function - get the higher 16 bits.
def HI16 : SDNodeXForm<imm, [{
- return getI32Imm((unsigned)N->getZExtValue() >> 16);
+ return getImm(N, (N->getZExtValue() >> 16) & 0xFFFF);
}]>;
// Node immediate fits as 16-bit sign extended on target immediate.
@@ -380,21 +382,13 @@ class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC,
let isPseudo = Pseudo;
}
-// Memory Load/Store
+// Unaligned Memory Load/Store
let canFoldAsLoad = 1 in
-class LoadX<bits<6> op, RegisterClass RC,
- Operand MemOpnd>:
- FMem<op, (outs RC:$rt), (ins MemOpnd:$addr),
- "",
- [], IILoad> {
-}
+class LoadUnAlign<bits<6> op, RegisterClass RC, Operand MemOpnd>:
+ FMem<op, (outs RC:$rt), (ins MemOpnd:$addr), "", [], IILoad> {}
-class StoreX<bits<6> op, RegisterClass RC,
- Operand MemOpnd>:
- FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr),
- "",
- [], IIStore> {
-}
+class StoreUnAlign<bits<6> op, RegisterClass RC, Operand MemOpnd>:
+ FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr), "", [], IIStore> {}
// 32-bit load.
multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode,
@@ -415,10 +409,10 @@ multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode,
}
// 32-bit load.
-multiclass LoadX32<bits<6> op> {
- def #NAME# : LoadX<op, CPURegs, mem>,
+multiclass LoadUnAlign32<bits<6> op> {
+ def #NAME# : LoadUnAlign<op, CPURegs, mem>,
Requires<[NotN64]>;
- def _P8 : LoadX<op, CPURegs, mem64>,
+ def _P8 : LoadUnAlign<op, CPURegs, mem64>,
Requires<[IsN64]>;
}
// 32-bit store.
@@ -440,18 +434,18 @@ multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode,
}
// 32-bit store.
-multiclass StoreX32<bits<6> op> {
- def #NAME# : StoreX<op, CPURegs, mem>,
+multiclass StoreUnAlign32<bits<6> op> {
+ def #NAME# : StoreUnAlign<op, CPURegs, mem>,
Requires<[NotN64]>;
- def _P8 : StoreX<op, CPURegs, mem64>,
+ def _P8 : StoreUnAlign<op, CPURegs, mem64>,
Requires<[IsN64]>;
}
// Conditional Branch
class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>:
- CBranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16),
- !strconcat(instr_asm, "\t$rs, $rt, $imm16"),
- [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$imm16)], IIBranch> {
+ BranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16),
+ !strconcat(instr_asm, "\t$rs, $rt, $imm16"),
+ [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$imm16)], IIBranch> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
@@ -459,9 +453,9 @@ class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>:
class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op,
RegisterClass RC>:
- CBranchBase<op, (outs), (ins RC:$rs, brtarget:$imm16),
- !strconcat(instr_asm, "\t$rs, $imm16"),
- [(brcond (i32 (cond_op RC:$rs, 0)), bb:$imm16)], IIBranch> {
+ BranchBase<op, (outs), (ins RC:$rs, brtarget:$imm16),
+ !strconcat(instr_asm, "\t$rs, $imm16"),
+ [(brcond (i32 (cond_op RC:$rs, 0)), bb:$imm16)], IIBranch> {
let rt = _rt;
let isBranch = 1;
let isTerminator = 1;
@@ -485,11 +479,29 @@ class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, Operand Od,
[(set CPURegs:$rt, (cond_op RC:$rs, imm_type:$imm16))],
IIAlu>;
-// Unconditional branch
-let isBranch=1, isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
+// Jump
class JumpFJ<bits<6> op, string instr_asm>:
FJ<op, (outs), (ins jmptarget:$target),
- !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch>;
+ !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch> {
+ let isBranch=1;
+ let isTerminator=1;
+ let isBarrier=1;
+ let hasDelaySlot = 1;
+ let Predicates = [RelocStatic];
+}
+
+// Unconditional branch
+class UncondBranch<bits<6> op, string instr_asm>:
+ BranchBase<op, (outs), (ins brtarget:$imm16),
+ !strconcat(instr_asm, "\t$imm16"), [(br bb:$imm16)], IIBranch> {
+ let rs = 0;
+ let rt = 0;
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let hasDelaySlot = 1;
+ let Predicates = [RelocPIC];
+}
let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1,
isIndirectBranch = 1 in
@@ -616,21 +628,37 @@ class ByteSwap<bits<6> func, bits<5> sa, string instr_asm>:
}
// Read Hardware
-class ReadHardware: FR<0x1f, 0x3b, (outs CPURegs:$rt), (ins HWRegs:$rd),
- "rdhwr\t$rt, $rd", [], IIAlu> {
+class ReadHardware<RegisterClass CPURegClass, RegisterClass HWRegClass>
+ : FR<0x1f, 0x3b, (outs CPURegClass:$rt), (ins HWRegClass:$rd),
+ "rdhwr\t$rt, $rd", [], IIAlu> {
let rs = 0;
let shamt = 0;
}
// Ext and Ins
-class ExtIns<bits<6> _funct, string instr_asm, dag outs, dag ins,
- list<dag> pattern, InstrItinClass itin>:
- FR<0x1f, _funct, outs, ins, !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
- pattern, itin>, Requires<[HasMips32r2]> {
+class ExtBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
+ FR<0x1f, _funct, (outs RC:$rt), (ins RC:$rs, uimm16:$pos, size_ext:$sz),
+ !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
+ [(set RC:$rt, (MipsExt RC:$rs, imm:$pos, imm:$sz))], NoItinerary> {
bits<5> pos;
bits<5> sz;
let rd = sz;
let shamt = pos;
+ let Predicates = [HasMips32r2];
+}
+
+class InsBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
+ FR<0x1f, _funct, (outs RC:$rt),
+ (ins RC:$rs, uimm16:$pos, size_ins:$sz, RC:$src),
+ !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
+ [(set RC:$rt, (MipsIns RC:$rs, imm:$pos, imm:$sz, RC:$src))],
+ NoItinerary> {
+ bits<5> pos;
+ bits<5> sz;
+ let rd = sz;
+ let shamt = pos;
+ let Predicates = [HasMips32r2];
+ let Constraints = "$src = $rt";
}
// Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*).
@@ -795,10 +823,10 @@ defm USH : StoreM32<0x29, "ush", truncstorei16_u, 1>;
defm USW : StoreM32<0x2b, "usw", store_u, 1>;
/// Primitives for unaligned
-defm LWL : LoadX32<0x22>;
-defm LWR : LoadX32<0x26>;
-defm SWL : StoreX32<0x2A>;
-defm SWR : StoreX32<0x2E>;
+defm LWL : LoadUnAlign32<0x22>;
+defm LWR : LoadUnAlign32<0x26>;
+defm SWL : StoreUnAlign32<0x2A>;
+defm SWR : StoreUnAlign32<0x2E>;
let hasSideEffects = 1 in
def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype",
@@ -822,6 +850,7 @@ def J : JumpFJ<0x02, "j">;
def JR : JumpFR<0x00, 0x08, "jr", CPURegs>;
def JAL : JumpLink<0x03, "jal">;
def JALR : JumpLinkReg<0x00, 0x09, "jalr">;
+def B : UncondBranch<0x04, "b">;
def BEQ : CBranch<0x04, "beq", seteq, CPURegs>;
def BNE : CBranch<0x05, "bne", setne, CPURegs>;
def BGEZ : CBranchZero<0x01, 1, "bgez", setge, CPURegs>;
@@ -888,21 +917,10 @@ def MSUBU : MArithR<5, "msubu", MipsMSubu>;
def MUL : ArithLogicR<0x1c, 0x02, "mul", mul, IIImul, CPURegs, 1>,
Requires<[HasMips32]>;
-def RDHWR : ReadHardware;
-
-def EXT : ExtIns<0, "ext", (outs CPURegs:$rt),
- (ins CPURegs:$rs, uimm16:$pos, size_ext:$sz),
- [(set CPURegs:$rt,
- (MipsExt CPURegs:$rs, immZExt5:$pos, immZExt5:$sz))],
- NoItinerary>;
+def RDHWR : ReadHardware<CPURegs, HWRegs>;
-let Constraints = "$src = $rt" in
-def INS : ExtIns<4, "ins", (outs CPURegs:$rt),
- (ins CPURegs:$rs, uimm16:$pos, size_ins:$sz, CPURegs:$src),
- [(set CPURegs:$rt,
- (MipsIns CPURegs:$rs, immZExt5:$pos, immZExt5:$sz,
- CPURegs:$src))],
- NoItinerary>;
+def EXT : ExtBase<0, "ext", CPURegs>;
+def INS : InsBase<4, "ins", CPURegs>;
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
@@ -939,11 +957,13 @@ def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
+def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>;
def : Pat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>;
def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>;
def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>;
+def : Pat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
(ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
@@ -953,6 +973,8 @@ def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
(ADDiu CPURegs:$hi, tjumptable:$lo)>;
def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
(ADDiu CPURegs:$hi, tconstpool:$lo)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tglobaltlsaddr:$lo)),
+ (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>;
// gp_rel relocs
def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
@@ -960,26 +982,17 @@ def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)),
(ADDiu CPURegs:$gp, tconstpool:$in)>;
-// tlsgd
-def : Pat<(add CPURegs:$gp, (MipsTlsGd tglobaltlsaddr:$in)),
- (ADDiu CPURegs:$gp, tglobaltlsaddr:$in)>;
-
-// tprel hi/lo
-def : Pat<(MipsTprelHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
-def : Pat<(MipsTprelLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
-def : Pat<(add CPURegs:$hi, (MipsTprelLo tglobaltlsaddr:$lo)),
- (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>;
-
// wrapper_pic
-class WrapperPICPat<SDNode node>:
- Pat<(MipsWrapperPIC node:$in),
- (ADDiu GP, node:$in)>;
-
-def : WrapperPICPat<tglobaladdr>;
-def : WrapperPICPat<tconstpool>;
-def : WrapperPICPat<texternalsym>;
-def : WrapperPICPat<tblockaddress>;
-def : WrapperPICPat<tjumptable>;
+class WrapperPat<SDNode node, Instruction ADDiuOp, Register GPReg>:
+ Pat<(MipsWrapper node:$in),
+ (ADDiuOp GPReg, node:$in)>;
+
+def : WrapperPat<tglobaladdr, ADDiu, GP>;
+def : WrapperPat<tconstpool, ADDiu, GP>;
+def : WrapperPat<texternalsym, ADDiu, GP>;
+def : WrapperPat<tblockaddress, ADDiu, GP>;
+def : WrapperPat<tjumptable, ADDiu, GP>;
+def : WrapperPat<tglobaltlsaddr, ADDiu, GP>;
// Mips does not have "not", so we expand our way
def : Pat<(not CPURegs:$in),
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index 6fc2af1..23486d3 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -41,10 +41,14 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case MipsII::MO_NO_FLAG: Kind = MCSymbolRefExpr::VK_None; break;
case MipsII::MO_GPREL: Kind = MCSymbolRefExpr::VK_Mips_GPREL; break;
case MipsII::MO_GOT_CALL: Kind = MCSymbolRefExpr::VK_Mips_GOT_CALL; break;
+ case MipsII::MO_GOT16: Kind = MCSymbolRefExpr::VK_Mips_GOT16; break;
case MipsII::MO_GOT: Kind = MCSymbolRefExpr::VK_Mips_GOT; break;
case MipsII::MO_ABS_HI: Kind = MCSymbolRefExpr::VK_Mips_ABS_HI; break;
case MipsII::MO_ABS_LO: Kind = MCSymbolRefExpr::VK_Mips_ABS_LO; break;
case MipsII::MO_TLSGD: Kind = MCSymbolRefExpr::VK_Mips_TLSGD; break;
+ case MipsII::MO_TLSLDM: Kind = MCSymbolRefExpr::VK_Mips_TLSLDM; break;
+ case MipsII::MO_DTPREL_HI:Kind = MCSymbolRefExpr::VK_Mips_DTPREL_HI; break;
+ case MipsII::MO_DTPREL_LO:Kind = MCSymbolRefExpr::VK_Mips_DTPREL_LO; break;
case MipsII::MO_GOTTPREL: Kind = MCSymbolRefExpr::VK_Mips_GOTTPREL; break;
case MipsII::MO_TPREL_HI: Kind = MCSymbolRefExpr::VK_Mips_TPREL_HI; break;
case MipsII::MO_TPREL_LO: Kind = MCSymbolRefExpr::VK_Mips_TPREL_LO; break;
@@ -136,14 +140,35 @@ void MipsMCInstLower::LowerCPLOAD(const MachineInstr *MI,
}
// Lower ".cprestore offset" to "sw $gp, offset($sp)".
-void MipsMCInstLower::LowerCPRESTORE(const MachineInstr *MI, MCInst &OutMI) {
- OutMI.clear();
- OutMI.setOpcode(Mips::SW);
- OutMI.addOperand(MCOperand::CreateReg(Mips::GP));
- OutMI.addOperand(MCOperand::CreateReg(Mips::SP));
+void MipsMCInstLower::LowerCPRESTORE(const MachineInstr *MI,
+ SmallVector<MCInst, 4>& MCInsts) {
const MachineOperand &MO = MI->getOperand(0);
assert(MO.isImm() && "CPRESTORE's operand must be an immediate.");
- OutMI.addOperand(MCOperand::CreateImm(MO.getImm()));
+ unsigned Offset = MO.getImm(), Reg = Mips::SP;
+ MCInst Sw;
+
+ if (Offset >= 0x8000) {
+ unsigned Hi = (Offset >> 16) + ((Offset & 0x8000) != 0);
+ Offset &= 0xffff;
+ Reg = Mips::AT;
+
+ // lui at,hi
+ // addu at,at,sp
+ MCInsts.resize(2);
+ MCInsts[0].setOpcode(Mips::LUi);
+ MCInsts[0].addOperand(MCOperand::CreateReg(Mips::AT));
+ MCInsts[0].addOperand(MCOperand::CreateImm(Hi));
+ MCInsts[1].setOpcode(Mips::ADDu);
+ MCInsts[1].addOperand(MCOperand::CreateReg(Mips::AT));
+ MCInsts[1].addOperand(MCOperand::CreateReg(Mips::AT));
+ MCInsts[1].addOperand(MCOperand::CreateReg(Mips::SP));
+ }
+
+ Sw.setOpcode(Mips::SW);
+ Sw.addOperand(MCOperand::CreateReg(Mips::GP));
+ Sw.addOperand(MCOperand::CreateReg(Reg));
+ Sw.addOperand(MCOperand::CreateImm(Offset));
+ MCInsts.push_back(Sw);
}
MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO,
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index 98e37e4..1490c14 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -36,7 +36,7 @@ public:
MipsAsmPrinter &asmprinter);
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
void LowerCPLOAD(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts);
- void LowerCPRESTORE(const MachineInstr *MI, MCInst &OutMI);
+ void LowerCPRESTORE(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts);
void LowerUnalignedLoadStore(const MachineInstr *MI,
SmallVector<MCInst, 4>& MCInsts);
private:
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 06c4a66..e5a0f08 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -125,6 +125,7 @@ getRegisterNumbering(unsigned RegEnum)
case Mips::D14:
return 28;
case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64:
+ case Mips::HWR29:
return 29;
case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64:
case Mips::D15:
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 925ad9e..76ee2e6 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -239,6 +239,7 @@ let Namespace = "Mips" in {
// Hardware register $29
def HWR29 : Register<"29">;
+ def HWR29_64 : Register<"29">;
}
//===----------------------------------------------------------------------===//
@@ -301,3 +302,5 @@ def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)> {
// Hardware registers
def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
+def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>;
+
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 016d449..dc299f2 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -31,7 +31,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
{
std::string CPUName = CPU;
if (CPUName.empty())
- CPUName = "mips32r1";
+ CPUName = "mips32";
// Parse features string.
ParseSubtargetFeatures(CPUName, FS);
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 5d6b24f..02887fa 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -34,51 +34,51 @@ extern "C" void LLVMInitializeMipsTarget() {
// Using CodeModel::Large enables different CALL behavior.
MipsTargetMachine::
MipsTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
- bool isLittle):
- LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
- Subtarget(TT, CPU, FS, isLittle),
- DataLayout(isLittle ?
- (Subtarget.isABI_N64() ?
- "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
- "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
- (Subtarget.isABI_N64() ?
- "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
- "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
- InstrInfo(*this),
- FrameLowering(Subtarget),
- TLInfo(*this), TSInfo(*this), JITInfo() {
+ bool isLittle)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, isLittle),
+ DataLayout(isLittle ?
+ (Subtarget.isABI_N64() ?
+ "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
+ "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
+ (Subtarget.isABI_N64() ?
+ "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
+ "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
+ InstrInfo(*this),
+ FrameLowering(Subtarget),
+ TLInfo(*this), TSInfo(*this), JITInfo() {
}
MipsebTargetMachine::
MipsebTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL) :
- MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {}
+ CodeGenOpt::Level OL)
+ : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
MipselTargetMachine::
MipselTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL) :
- MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {}
+ CodeGenOpt::Level OL)
+ : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
Mips64ebTargetMachine::
Mips64ebTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL) :
- MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {}
+ CodeGenOpt::Level OL)
+ : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
Mips64elTargetMachine::
Mips64elTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL) :
- MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {}
+ CodeGenOpt::Level OL)
+ : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
// Install an instruction selector pass using
// the ISelDag to gen Mips code.
@@ -120,4 +120,3 @@ bool MipsTargetMachine::addCodeEmitter(PassManagerBase &PM,
PM.add(createMipsJITCodeEmitterPass(*this, JCE));
return false;
}
-
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index e40d9e2..6842373 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -38,7 +38,7 @@ namespace llvm {
public:
MipsTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool isLittle);
@@ -82,7 +82,7 @@ namespace llvm {
class MipsebTargetMachine : public MipsTargetMachine {
public:
MipsebTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
@@ -92,7 +92,7 @@ public:
class MipselTargetMachine : public MipsTargetMachine {
public:
MipselTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
@@ -103,6 +103,7 @@ class Mips64ebTargetMachine : public MipsTargetMachine {
public:
Mips64ebTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
@@ -113,6 +114,7 @@ class Mips64elTargetMachine : public MipsTargetMachine {
public:
Mips64elTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
diff --git a/lib/Target/Mips/TargetInfo/CMakeLists.txt b/lib/Target/Mips/TargetInfo/CMakeLists.txt
index 5692604..4172d00 100644
--- a/lib/Target/Mips/TargetInfo/CMakeLists.txt
+++ b/lib/Target/Mips/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMMipsInfo
MipsTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMMipsInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMMipsInfo MipsCommonTableGen)
diff --git a/lib/Target/Mips/TargetInfo/LLVMBuild.txt b/lib/Target/Mips/TargetInfo/LLVMBuild.txt
index 90ae260..2d42568 100644
--- a/lib/Target/Mips/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/Mips/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MipsInfo
parent = Mips
required_libraries = MC Support Target
add_to_library_groups = Mips
-
diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt
index 6709c1b..a9f4330 100644
--- a/lib/Target/PTX/CMakeLists.txt
+++ b/lib/Target/PTX/CMakeLists.txt
@@ -25,20 +25,6 @@ add_llvm_target(PTXCodeGen
PTXTargetMachine.cpp
)
-add_llvm_library_dependencies(LLVMPTXCodeGen
- LLVMAnalysis
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMPTXDesc
- LLVMPTXInfo
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
- )
-
add_subdirectory(TargetInfo)
add_subdirectory(InstPrinter)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PTX/InstPrinter/CMakeLists.txt b/lib/Target/PTX/InstPrinter/CMakeLists.txt
index 029d060..b252893 100644
--- a/lib/Target/PTX/InstPrinter/CMakeLists.txt
+++ b/lib/Target/PTX/InstPrinter/CMakeLists.txt
@@ -6,8 +6,3 @@ add_llvm_library(LLVMPTXAsmPrinter
add_dependencies(LLVMPTXAsmPrinter PTXCommonTableGen)
-add_llvm_library_dependencies(LLVMPTXAsmPrinter
- LLVMMC
- LLVMSupport
- )
-
diff --git a/lib/Target/PTX/InstPrinter/LLVMBuild.txt b/lib/Target/PTX/InstPrinter/LLVMBuild.txt
index be89c10..af5d200 100644
--- a/lib/Target/PTX/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/PTX/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = PTXAsmPrinter
parent = PTX
required_libraries = MC Support
add_to_library_groups = PTX
-
diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
index 2f6c92d..5fecb85 100644
--- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
+++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
@@ -38,7 +38,50 @@ StringRef PTXInstPrinter::getOpcodeName(unsigned Opcode) const {
}
void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- OS << getRegisterName(RegNo);
+ // Decode the register number into type and offset
+ unsigned RegSpace = RegNo & 0x7;
+ unsigned RegType = (RegNo >> 3) & 0x7;
+ unsigned RegOffset = RegNo >> 6;
+
+ // Print the register
+ OS << "%";
+
+ switch (RegSpace) {
+ default:
+ llvm_unreachable("Unknown register space!");
+ case PTXRegisterSpace::Reg:
+ switch (RegType) {
+ default:
+ llvm_unreachable("Unknown register type!");
+ case PTXRegisterType::Pred:
+ OS << "p";
+ break;
+ case PTXRegisterType::B16:
+ OS << "rh";
+ break;
+ case PTXRegisterType::B32:
+ OS << "r";
+ break;
+ case PTXRegisterType::B64:
+ OS << "rd";
+ break;
+ case PTXRegisterType::F32:
+ OS << "f";
+ break;
+ case PTXRegisterType::F64:
+ OS << "fd";
+ break;
+ }
+ break;
+ case PTXRegisterSpace::Return:
+ OS << "ret";
+ break;
+ case PTXRegisterSpace::Argument:
+ OS << "arg";
+ break;
+ }
+
+ OS << RegOffset;
}
void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
@@ -139,6 +182,8 @@ void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
} else {
O << "0000000000000000";
}
+ } else if (Op.isReg()) {
+ printRegName(O, Op.getReg());
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
const MCExpr *Expr = Op.getExpr();
diff --git a/lib/Target/PTX/LLVMBuild.txt b/lib/Target/PTX/LLVMBuild.txt
index 22c70de..15a1eb5 100644
--- a/lib/Target/PTX/LLVMBuild.txt
+++ b/lib/Target/PTX/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
[component_0]
type = TargetGroup
name = PTX
@@ -27,4 +30,3 @@ name = PTXCodeGen
parent = PTX
required_libraries = Analysis AsmPrinter CodeGen Core MC PTXDesc PTXInfo SelectionDAG Support Target TransformUtils
add_to_library_groups = PTX
-
diff --git a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
index 94dbcee..d1fd74c 100644
--- a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
@@ -3,11 +3,4 @@ add_llvm_library(LLVMPTXDesc
PTXMCAsmInfo.cpp
)
-add_llvm_library_dependencies(LLVMPTXDesc
- LLVMMC
- LLVMPTXAsmPrinter
- LLVMPTXInfo
- LLVMSupport
- )
-
add_dependencies(LLVMPTXDesc PTXCommonTableGen)
diff --git a/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt b/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt
index fff21c1..19b80c5 100644
--- a/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = PTXDesc
parent = PTX
required_libraries = MC PTXAsmPrinter PTXInfo Support
add_to_library_groups = PTX
-
diff --git a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
index c6094be..77a298d 100644
--- a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
+++ b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
@@ -17,6 +17,8 @@
#ifndef PTXBASEINFO_H
#define PTXBASEINFO_H
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "PTXMCTargetDesc.h"
namespace llvm {
@@ -57,6 +59,75 @@ namespace llvm {
RndPosInfInt = 10 // .rpi
};
} // namespace PTXII
+
+ namespace PTXRegisterType {
+ // Register type encoded in MCOperands
+ enum {
+ Pred = 0,
+ B16,
+ B32,
+ B64,
+ F32,
+ F64
+ };
+ } // namespace PTXRegisterType
+
+ namespace PTXRegisterSpace {
+ // Register space encoded in MCOperands
+ enum {
+ Reg = 0,
+ Local,
+ Param,
+ Argument,
+ Return
+ };
+ }
+
+ inline static void decodeRegisterName(raw_ostream &OS,
+ unsigned EncodedReg) {
+ OS << "%";
+
+ unsigned RegSpace = EncodedReg & 0x7;
+ unsigned RegType = (EncodedReg >> 3) & 0x7;
+ unsigned RegOffset = EncodedReg >> 6;
+
+ switch (RegSpace) {
+ default:
+ llvm_unreachable("Unknown register space!");
+ case PTXRegisterSpace::Reg:
+ switch (RegType) {
+ default:
+ llvm_unreachable("Unknown register type!");
+ case PTXRegisterType::Pred:
+ OS << "p";
+ break;
+ case PTXRegisterType::B16:
+ OS << "rh";
+ break;
+ case PTXRegisterType::B32:
+ OS << "r";
+ break;
+ case PTXRegisterType::B64:
+ OS << "rd";
+ break;
+ case PTXRegisterType::F32:
+ OS << "f";
+ break;
+ case PTXRegisterType::F64:
+ OS << "fd";
+ break;
+ }
+ break;
+ case PTXRegisterSpace::Return:
+ OS << "ret";
+ break;
+ case PTXRegisterSpace::Argument:
+ OS << "arg";
+ break;
+ }
+
+ OS << RegOffset;
+ }
} // namespace llvm
#endif
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp
index bdf238b..77ed71d 100644
--- a/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -51,23 +51,23 @@ using namespace llvm;
static const char PARAM_PREFIX[] = "__param_";
static const char RETURN_PREFIX[] = "__ret_";
-static const char *getRegisterTypeName(unsigned RegNo,
- const MachineRegisterInfo& MRI) {
- const TargetRegisterClass *TRC = MRI.getRegClass(RegNo);
-
-#define TEST_REGCLS(cls, clsstr) \
- if (PTX::cls ## RegisterClass == TRC) return # clsstr;
-
- TEST_REGCLS(RegPred, pred);
- TEST_REGCLS(RegI16, b16);
- TEST_REGCLS(RegI32, b32);
- TEST_REGCLS(RegI64, b64);
- TEST_REGCLS(RegF32, b32);
- TEST_REGCLS(RegF64, b64);
-#undef TEST_REGCLS
-
- llvm_unreachable("Not in any register class!");
- return NULL;
+static const char *getRegisterTypeName(unsigned RegType) {
+ switch (RegType) {
+ default:
+ llvm_unreachable("Unknown register type");
+ case PTXRegisterType::Pred:
+ return ".pred";
+ case PTXRegisterType::B16:
+ return ".b16";
+ case PTXRegisterType::B32:
+ return ".b32";
+ case PTXRegisterType::B64:
+ return ".b64";
+ case PTXRegisterType::F32:
+ return ".f32";
+ case PTXRegisterType::F64:
+ return ".f64";
+ }
}
static const char *getStateSpaceName(unsigned addressSpace) {
@@ -188,32 +188,32 @@ void PTXAsmPrinter::EmitFunctionBodyStart() {
unsigned numRegs;
// pred
- numRegs = MFI->getNumRegistersForClass(PTX::RegPredRegisterClass);
+ numRegs = MFI->countRegisters(PTXRegisterType::Pred, PTXRegisterSpace::Reg);
if(numRegs > 0)
os << "\t.reg .pred %p<" << numRegs << ">;\n";
// i16
- numRegs = MFI->getNumRegistersForClass(PTX::RegI16RegisterClass);
+ numRegs = MFI->countRegisters(PTXRegisterType::B16, PTXRegisterSpace::Reg);
if(numRegs > 0)
os << "\t.reg .b16 %rh<" << numRegs << ">;\n";
// i32
- numRegs = MFI->getNumRegistersForClass(PTX::RegI32RegisterClass);
+ numRegs = MFI->countRegisters(PTXRegisterType::B32, PTXRegisterSpace::Reg);
if(numRegs > 0)
os << "\t.reg .b32 %r<" << numRegs << ">;\n";
// i64
- numRegs = MFI->getNumRegistersForClass(PTX::RegI64RegisterClass);
+ numRegs = MFI->countRegisters(PTXRegisterType::B64, PTXRegisterSpace::Reg);
if(numRegs > 0)
os << "\t.reg .b64 %rd<" << numRegs << ">;\n";
// f32
- numRegs = MFI->getNumRegistersForClass(PTX::RegF32RegisterClass);
+ numRegs = MFI->countRegisters(PTXRegisterType::F32, PTXRegisterSpace::Reg);
if(numRegs > 0)
os << "\t.reg .f32 %f<" << numRegs << ">;\n";
// f64
- numRegs = MFI->getNumRegistersForClass(PTX::RegF64RegisterClass);
+ numRegs = MFI->countRegisters(PTXRegisterType::F64, PTXRegisterSpace::Reg);
if(numRegs > 0)
os << "\t.reg .f64 %fd<" << numRegs << ">;\n";
@@ -368,7 +368,6 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() {
const PTXParamManager &PM = MFI->getParamManager();
const bool isKernel = MFI->isKernel();
const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
- const MachineRegisterInfo& MRI = MF->getRegInfo();
SmallString<128> decl;
raw_svector_ostream os(decl);
@@ -391,7 +390,7 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() {
if (i != b)
os << ", ";
- os << ".reg ." << getRegisterTypeName(*i, MRI) << ' '
+ os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' '
<< MFI->getRegisterName(*i);
}
}
@@ -450,7 +449,7 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() {
if (i != b)
os << ", ";
- os << ".reg ." << getRegisterTypeName(*i, MRI) << ' '
+ os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' '
<< MFI->getRegisterName(*i);
}
}
@@ -521,20 +520,18 @@ MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO,
MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
MCOperand MCOp;
const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
- const MCExpr *Expr;
- const char *RegSymbolName;
+ unsigned EncodedReg;
switch (MO.getType()) {
default:
llvm_unreachable("Unknown operand type");
case MachineOperand::MO_Register:
- // We create register operands as symbols, since the PTXInstPrinter class
- // has no way to map virtual registers back to a name without some ugly
- // hacks.
- // FIXME: Figure out a better way to handle virtual register naming.
- RegSymbolName = MFI->getRegisterName(MO.getReg());
- Expr = MCSymbolRefExpr::Create(RegSymbolName, MCSymbolRefExpr::VK_None,
- OutContext);
- MCOp = MCOperand::CreateExpr(Expr);
+ if (MO.getReg() > 0) {
+ // Encode the register
+ EncodedReg = MFI->getEncodedRegister(MO.getReg());
+ } else {
+ EncodedReg = 0;
+ }
+ MCOp = MCOperand::CreateReg(EncodedReg);
break;
case MachineOperand::MO_Immediate:
MCOp = MCOperand::CreateImm(MO.getImm());
diff --git a/lib/Target/PTX/PTXFPRoundingModePass.cpp b/lib/Target/PTX/PTXFPRoundingModePass.cpp
index 0b653e0..a21d172 100644
--- a/lib/Target/PTX/PTXFPRoundingModePass.cpp
+++ b/lib/Target/PTX/PTXFPRoundingModePass.cpp
@@ -23,9 +23,11 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
// NOTE: PTXFPRoundingModePass should be executed just before emission.
-namespace llvm {
+namespace {
/// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to
/// all FP instructions. Essentially, this pass just looks for all FP
/// instructions that have a rounding mode set to RndDefault, and sets an
@@ -58,7 +60,7 @@ namespace llvm {
void initializeMap();
void processInstruction(MachineInstr &MI);
}; // class PTXFPRoundingModePass
-} // namespace llvm
+} // end anonymous namespace
using namespace llvm;
diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp
index 17191fb..a012297 100644
--- a/lib/Target/PTX/PTXISelLowering.cpp
+++ b/lib/Target/PTX/PTXISelLowering.cpp
@@ -243,6 +243,30 @@ SDValue PTXTargetLowering::
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
EVT RegVT = Ins[i].VT;
TargetRegisterClass* TRC = getRegClassFor(RegVT);
+ unsigned RegType;
+
+ // Determine which register class we need
+ if (RegVT == MVT::i1) {
+ RegType = PTXRegisterType::Pred;
+ }
+ else if (RegVT == MVT::i16) {
+ RegType = PTXRegisterType::B16;
+ }
+ else if (RegVT == MVT::i32) {
+ RegType = PTXRegisterType::B32;
+ }
+ else if (RegVT == MVT::i64) {
+ RegType = PTXRegisterType::B64;
+ }
+ else if (RegVT == MVT::f32) {
+ RegType = PTXRegisterType::F32;
+ }
+ else if (RegVT == MVT::f64) {
+ RegType = PTXRegisterType::F64;
+ }
+ else {
+ llvm_unreachable("Unknown parameter type");
+ }
// Use a unique index in the instruction to prevent instruction folding.
// Yes, this is a hack.
@@ -253,7 +277,7 @@ SDValue PTXTargetLowering::
InVals.push_back(ArgValue);
- MFI->addArgReg(Reg);
+ MFI->addRegister(Reg, RegType, PTXRegisterSpace::Argument);
}
}
@@ -304,25 +328,32 @@ SDValue PTXTargetLowering::
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
EVT RegVT = Outs[i].VT;
TargetRegisterClass* TRC = 0;
+ unsigned RegType;
// Determine which register class we need
if (RegVT == MVT::i1) {
TRC = PTX::RegPredRegisterClass;
+ RegType = PTXRegisterType::Pred;
}
else if (RegVT == MVT::i16) {
TRC = PTX::RegI16RegisterClass;
+ RegType = PTXRegisterType::B16;
}
else if (RegVT == MVT::i32) {
TRC = PTX::RegI32RegisterClass;
+ RegType = PTXRegisterType::B32;
}
else if (RegVT == MVT::i64) {
TRC = PTX::RegI64RegisterClass;
+ RegType = PTXRegisterType::B64;
}
else if (RegVT == MVT::f32) {
TRC = PTX::RegF32RegisterClass;
+ RegType = PTXRegisterType::F32;
}
else if (RegVT == MVT::f64) {
TRC = PTX::RegF64RegisterClass;
+ RegType = PTXRegisterType::F64;
}
else {
llvm_unreachable("Unknown parameter type");
@@ -335,7 +366,7 @@ SDValue PTXTargetLowering::
Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg);
- MFI->addRetReg(Reg);
+ MFI->addRegister(Reg, RegType, PTXRegisterSpace::Return);
}
}
diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp
index 1b947a5..871b3a7 100644
--- a/lib/Target/PTX/PTXInstrInfo.cpp
+++ b/lib/Target/PTX/PTXInstrInfo.cpp
@@ -116,7 +116,7 @@ bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const {
}
bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
- return !isPredicated(MI) && get(MI->getOpcode()).isTerminator();
+ return !isPredicated(MI) && MI->isTerminator();
}
bool PTXInstrInfo::
@@ -184,15 +184,13 @@ AnalyzeBranch(MachineBasicBlock &MBB,
if (MBB.empty())
return true;
- MachineBasicBlock::const_iterator iter = MBB.end();
+ MachineBasicBlock::iterator iter = MBB.end();
const MachineInstr& instLast1 = *--iter;
- const MCInstrDesc &desc1 = instLast1.getDesc();
// for special case that MBB has only 1 instruction
const bool IsSizeOne = MBB.size() == 1;
// if IsSizeOne is true, *--iter and instLast2 are invalid
// we put a dummy value in instLast2 and desc2 since they are used
const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter;
- const MCInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc();
DEBUG(dbgs() << "\n");
DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n");
@@ -207,7 +205,7 @@ AnalyzeBranch(MachineBasicBlock &MBB,
}
// this block ends with only an unconditional branch
- if (desc1.isUnconditionalBranch() &&
+ if (instLast1.isUnconditionalBranch() &&
// when IsSizeOne is true, it "absorbs" the evaluation of instLast2
(IsSizeOne || !IsAnyKindOfBranch(instLast2))) {
DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n");
@@ -217,7 +215,7 @@ AnalyzeBranch(MachineBasicBlock &MBB,
// this block ends with a conditional branch and
// it falls through to a successor block
- if (desc1.isConditionalBranch() &&
+ if (instLast1.isConditionalBranch() &&
IsAnySuccessorAlsoLayoutSuccessor(MBB)) {
DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n");
TBB = GetBranchTarget(instLast1);
@@ -233,8 +231,8 @@ AnalyzeBranch(MachineBasicBlock &MBB,
// this block ends with a conditional branch
// followed by an unconditional branch
- if (desc2.isConditionalBranch() &&
- desc1.isUnconditionalBranch()) {
+ if (instLast2.isConditionalBranch() &&
+ instLast1.isUnconditionalBranch()) {
DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n");
TBB = GetBranchTarget(instLast2);
FBB = GetBranchTarget(instLast1);
@@ -341,8 +339,7 @@ void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) {
}
bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) {
- const MCInstrDesc &desc = inst.getDesc();
- return desc.isTerminator() || desc.isBranch() || desc.isIndirectBranch();
+ return inst.isTerminator() || inst.isBranch() || inst.isIndirectBranch();
}
bool PTXInstrInfo::
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td
index bcd5bcf..19a862f 100644
--- a/lib/Target/PTX/PTXInstrInfo.td
+++ b/lib/Target/PTX/PTXInstrInfo.td
@@ -825,17 +825,17 @@ let hasSideEffects = 1 in {
///===- Parameter Passing Pseudo-Instructions -----------------------------===//
def READPARAMPRED : InstPTX<(outs RegPred:$a), (ins i32imm:$b),
- "mov.pred\t$a, %param$b", []>;
+ "mov.pred\t$a, %arg$b", []>;
def READPARAMI16 : InstPTX<(outs RegI16:$a), (ins i32imm:$b),
- "mov.b16\t$a, %param$b", []>;
+ "mov.b16\t$a, %arg$b", []>;
def READPARAMI32 : InstPTX<(outs RegI32:$a), (ins i32imm:$b),
- "mov.b32\t$a, %param$b", []>;
+ "mov.b32\t$a, %arg$b", []>;
def READPARAMI64 : InstPTX<(outs RegI64:$a), (ins i32imm:$b),
- "mov.b64\t$a, %param$b", []>;
+ "mov.b64\t$a, %arg$b", []>;
def READPARAMF32 : InstPTX<(outs RegF32:$a), (ins i32imm:$b),
- "mov.f32\t$a, %param$b", []>;
+ "mov.f32\t$a, %arg$b", []>;
def READPARAMF64 : InstPTX<(outs RegF64:$a), (ins i32imm:$b),
- "mov.f64\t$a, %param$b", []>;
+ "mov.f64\t$a, %arg$b", []>;
def WRITEPARAMPRED : InstPTX<(outs), (ins RegPred:$a), "//w", []>;
def WRITEPARAMI16 : InstPTX<(outs), (ins RegI16:$a), "//w", []>;
diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp
index b33a273..26ec623 100644
--- a/lib/Target/PTX/PTXMFInfoExtract.cpp
+++ b/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -22,9 +22,11 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
// NOTE: PTXMFInfoExtract must after register allocation!
-namespace llvm {
+namespace {
/// PTXMFInfoExtract - PTX specific code to extract of PTX machine
/// function information for PTXAsmPrinter
///
@@ -42,7 +44,7 @@ namespace llvm {
return "PTX Machine Function Info Extractor";
}
}; // class PTXMFInfoExtract
-} // namespace llvm
+} // end anonymous namespace
using namespace llvm;
@@ -56,7 +58,20 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
- MFI->addVirtualRegister(TRC, Reg);
+ unsigned RegType;
+ if (TRC == PTX::RegPredRegisterClass)
+ RegType = PTXRegisterType::Pred;
+ else if (TRC == PTX::RegI16RegisterClass)
+ RegType = PTXRegisterType::B16;
+ else if (TRC == PTX::RegI32RegisterClass)
+ RegType = PTXRegisterType::B32;
+ else if (TRC == PTX::RegI64RegisterClass)
+ RegType = PTXRegisterType::B64;
+ else if (TRC == PTX::RegF32RegisterClass)
+ RegType = PTXRegisterType::F32;
+ else if (TRC == PTX::RegF64RegisterClass)
+ RegType = PTXRegisterType::F64;
+ MFI->addRegister(Reg, RegType, PTXRegisterSpace::Reg);
}
return false;
diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h
index 3b985f7..1a2878c 100644
--- a/lib/Target/PTX/PTXMachineFunctionInfo.h
+++ b/lib/Target/PTX/PTXMachineFunctionInfo.h
@@ -35,15 +35,22 @@ private:
DenseSet<unsigned> RegArgs;
DenseSet<unsigned> RegRets;
- typedef std::vector<unsigned> RegisterList;
- typedef DenseMap<const TargetRegisterClass*, RegisterList> RegisterMap;
- typedef DenseMap<unsigned, std::string> RegisterNameMap;
typedef DenseMap<int, std::string> FrameMap;
- RegisterMap UsedRegs;
- RegisterNameMap RegNames;
FrameMap FrameSymbols;
+ struct RegisterInfo {
+ unsigned Reg;
+ unsigned Type;
+ unsigned Space;
+ unsigned Offset;
+ unsigned Encoded;
+ };
+
+ typedef DenseMap<unsigned, RegisterInfo> RegisterInfoMap;
+
+ RegisterInfoMap RegInfo;
+
PTXParamManager ParamManager;
public:
@@ -51,13 +58,7 @@ public:
PTXMachineFunctionInfo(MachineFunction &MF)
: IsKernel(false) {
- UsedRegs[PTX::RegPredRegisterClass] = RegisterList();
- UsedRegs[PTX::RegI16RegisterClass] = RegisterList();
- UsedRegs[PTX::RegI32RegisterClass] = RegisterList();
- UsedRegs[PTX::RegI64RegisterClass] = RegisterList();
- UsedRegs[PTX::RegF32RegisterClass] = RegisterList();
- UsedRegs[PTX::RegF64RegisterClass] = RegisterList();
- }
+ }
/// getParamManager - Returns the PTXParamManager instance for this function.
PTXParamManager& getParamManager() { return ParamManager; }
@@ -78,69 +79,106 @@ public:
reg_iterator retreg_begin() const { return RegRets.begin(); }
reg_iterator retreg_end() const { return RegRets.end(); }
+ /// addRegister - Adds a virtual register to the set of all used registers
+ void addRegister(unsigned Reg, unsigned RegType, unsigned RegSpace) {
+ if (!RegInfo.count(Reg)) {
+ RegisterInfo Info;
+ Info.Reg = Reg;
+ Info.Type = RegType;
+ Info.Space = RegSpace;
+
+ // Determine register offset
+ Info.Offset = 0;
+ for(RegisterInfoMap::const_iterator i = RegInfo.begin(),
+ e = RegInfo.end(); i != e; ++i) {
+ const RegisterInfo& RI = i->second;
+ if (RI.Space == RegSpace)
+ if (RI.Space != PTXRegisterSpace::Reg || RI.Type == Info.Type)
+ Info.Offset++;
+ }
+
+ // Encode the register data into a single register number
+ Info.Encoded = (Info.Offset << 6) | (Info.Type << 3) | Info.Space;
+
+ RegInfo[Reg] = Info;
+
+ if (RegSpace == PTXRegisterSpace::Argument)
+ RegArgs.insert(Reg);
+ else if (RegSpace == PTXRegisterSpace::Return)
+ RegRets.insert(Reg);
+ }
+ }
+
+ /// countRegisters - Returns the number of registers of the given type and
+ /// space.
+ unsigned countRegisters(unsigned RegType, unsigned RegSpace) const {
+ unsigned Count = 0;
+ for(RegisterInfoMap::const_iterator i = RegInfo.begin(), e = RegInfo.end();
+ i != e; ++i) {
+ const RegisterInfo& RI = i->second;
+ if (RI.Type == RegType && RI.Space == RegSpace)
+ Count++;
+ }
+ return Count;
+ }
+
+ /// getEncodedRegister - Returns the encoded value of the register.
+ unsigned getEncodedRegister(unsigned Reg) const {
+ return RegInfo.lookup(Reg).Encoded;
+ }
+
/// addRetReg - Adds a register to the set of return-value registers.
void addRetReg(unsigned Reg) {
if (!RegRets.count(Reg)) {
RegRets.insert(Reg);
- std::string name;
- name = "%ret";
- name += utostr(RegRets.size() - 1);
- RegNames[Reg] = name;
}
}
/// addArgReg - Adds a register to the set of function argument registers.
void addArgReg(unsigned Reg) {
RegArgs.insert(Reg);
- std::string name;
- name = "%param";
- name += utostr(RegArgs.size() - 1);
- RegNames[Reg] = name;
- }
-
- /// addVirtualRegister - Adds a virtual register to the set of all used
- /// registers in the function.
- void addVirtualRegister(const TargetRegisterClass *TRC, unsigned Reg) {
- std::string name;
-
- // Do not count registers that are argument/return registers.
- if (!RegRets.count(Reg) && !RegArgs.count(Reg)) {
- UsedRegs[TRC].push_back(Reg);
- if (TRC == PTX::RegPredRegisterClass)
- name = "%p";
- else if (TRC == PTX::RegI16RegisterClass)
- name = "%rh";
- else if (TRC == PTX::RegI32RegisterClass)
- name = "%r";
- else if (TRC == PTX::RegI64RegisterClass)
- name = "%rd";
- else if (TRC == PTX::RegF32RegisterClass)
- name = "%f";
- else if (TRC == PTX::RegF64RegisterClass)
- name = "%fd";
- else
- llvm_unreachable("Invalid register class");
-
- name += utostr(UsedRegs[TRC].size() - 1);
- RegNames[Reg] = name;
- }
}
/// getRegisterName - Returns the name of the specified virtual register. This
/// name is used during PTX emission.
- const char *getRegisterName(unsigned Reg) const {
- if (RegNames.count(Reg))
- return RegNames.find(Reg)->second.c_str();
+ std::string getRegisterName(unsigned Reg) const {
+ if (RegInfo.count(Reg)) {
+ const RegisterInfo& RI = RegInfo.lookup(Reg);
+ std::string Name;
+ raw_string_ostream NameStr(Name);
+ decodeRegisterName(NameStr, RI.Encoded);
+ NameStr.flush();
+ return Name;
+ }
else if (Reg == PTX::NoRegister)
return "%noreg";
else
llvm_unreachable("Register not in register name map");
}
- /// getNumRegistersForClass - Returns the number of virtual registers that are
- /// used for the specified register class.
- unsigned getNumRegistersForClass(const TargetRegisterClass *TRC) const {
- return UsedRegs.lookup(TRC).size();
+ /// getEncodedRegisterName - Returns the name of the encoded register.
+ std::string getEncodedRegisterName(unsigned EncodedReg) const {
+ std::string Name;
+ raw_string_ostream NameStr(Name);
+ decodeRegisterName(NameStr, EncodedReg);
+ NameStr.flush();
+ return Name;
+ }
+
+ /// getRegisterType - Returns the type of the specified virtual register.
+ unsigned getRegisterType(unsigned Reg) const {
+ if (RegInfo.count(Reg))
+ return RegInfo.lookup(Reg).Type;
+ else
+ llvm_unreachable("Unknown register");
+ }
+
+ /// getOffsetForRegister - Returns the offset of the virtual register
+ unsigned getOffsetForRegister(unsigned Reg) const {
+ if (RegInfo.count(Reg))
+ return RegInfo.lookup(Reg).Offset;
+ else
+ return 0;
}
/// getFrameSymbol - Returns the symbol name for the given FrameIndex.
@@ -148,13 +186,13 @@ public:
if (FrameSymbols.count(FrameIndex)) {
return FrameSymbols.lookup(FrameIndex).c_str();
} else {
- std::string Name = "__local";
- Name += utostr(FrameIndex);
+ std::string Name = "__local";
+ Name += utostr(FrameIndex);
// The whole point of caching this name is to ensure the pointer we pass
// to any getExternalSymbol() calls will remain valid for the lifetime of
// the back-end instance. This is to work around an issue in SelectionDAG
// where symbol names are expected to be life-long strings.
- FrameSymbols[FrameIndex] = Name;
+ FrameSymbols[FrameIndex] = Name;
return FrameSymbols[FrameIndex].c_str();
}
}
diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp
index 292ea5e..4efdc27 100644
--- a/lib/Target/PTX/PTXTargetMachine.cpp
+++ b/lib/Target/PTX/PTXTargetMachine.cpp
@@ -67,30 +67,16 @@ namespace {
"e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
const char* DataLayout64 =
"e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
-
- // Copied from LLVMTargetMachine.cpp
- void printNoVerify(PassManagerBase &PM, const char *Banner) {
- if (PrintMachineCode)
- PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
- }
-
- void printAndVerify(PassManagerBase &PM,
- const char *Banner) {
- if (PrintMachineCode)
- PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
-
- //if (VerifyMachineCode)
- // PM.add(createMachineVerifierPass(Banner));
- }
}
// DataLayout and FrameLowering are filled with dummy data
PTXTargetMachine::PTXTargetMachine(const Target &T,
StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
DataLayout(is64Bit ? DataLayout64 : DataLayout32),
Subtarget(TT, CPU, FS, is64Bit),
FrameLowering(Subtarget),
@@ -101,16 +87,18 @@ PTXTargetMachine::PTXTargetMachine(const Target &T,
PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : PTXTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {
+ : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
}
PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : PTXTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {
+ : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
}
bool PTXTargetMachine::addInstSelector(PassManagerBase &PM) {
diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h
index 19f6c0f..22911f7 100644
--- a/lib/Target/PTX/PTXTargetMachine.h
+++ b/lib/Target/PTX/PTXTargetMachine.h
@@ -35,7 +35,7 @@ class PTXTargetMachine : public LLVMTargetMachine {
public:
PTXTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool is64Bit);
@@ -94,7 +94,7 @@ class PTX32TargetMachine : public PTXTargetMachine {
public:
PTX32TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
}; // class PTX32TargetMachine
@@ -103,7 +103,7 @@ class PTX64TargetMachine : public PTXTargetMachine {
public:
PTX64TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
}; // class PTX32TargetMachine
diff --git a/lib/Target/PTX/TargetInfo/CMakeLists.txt b/lib/Target/PTX/TargetInfo/CMakeLists.txt
index 2366e45..d9a5da3 100644
--- a/lib/Target/PTX/TargetInfo/CMakeLists.txt
+++ b/lib/Target/PTX/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMPTXInfo
PTXTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMPTXInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMPTXInfo PTXCommonTableGen)
diff --git a/lib/Target/PTX/TargetInfo/LLVMBuild.txt b/lib/Target/PTX/TargetInfo/LLVMBuild.txt
index 8e5285a..2cc30c4 100644
--- a/lib/Target/PTX/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/PTX/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = PTXInfo
parent = PTX
required_libraries = MC Support Target
add_to_library_groups = PTX
-
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 05c1ffd..1b85495 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -27,20 +27,6 @@ add_llvm_target(PowerPCCodeGen
PPCSelectionDAGInfo.cpp
)
-add_llvm_library_dependencies(LLVMPowerPCCodeGen
- LLVMAnalysis
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMPowerPCAsmPrinter
- LLVMPowerPCDesc
- LLVMPowerPCInfo
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(InstPrinter)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
index 1d857e2..a605cc4 100644
--- a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
+++ b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
@@ -4,9 +4,4 @@ add_llvm_library(LLVMPowerPCAsmPrinter
PPCInstPrinter.cpp
)
-add_llvm_library_dependencies(LLVMPowerPCAsmPrinter
- LLVMMC
- LLVMSupport
- )
-
add_dependencies(LLVMPowerPCAsmPrinter PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt b/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
index afbb2b1..7c691de 100644
--- a/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = PowerPCAsmPrinter
parent = PowerPC
required_libraries = MC Support
add_to_library_groups = PowerPC
-
diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt
index 5baa988..95fac54 100644
--- a/lib/Target/PowerPC/LLVMBuild.txt
+++ b/lib/Target/PowerPC/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
[component_0]
type = TargetGroup
name = PowerPC
@@ -28,4 +31,3 @@ name = PowerPCCodeGen
parent = PowerPC
required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo SelectionDAG Support Target
add_to_library_groups = PowerPC
-
diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
index c4041db..febf438 100644
--- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
@@ -6,11 +6,4 @@ add_llvm_library(LLVMPowerPCDesc
PPCPredicates.cpp
)
-add_llvm_library_dependencies(LLVMPowerPCDesc
- LLVMMC
- LLVMPowerPCAsmPrinter
- LLVMPowerPCInfo
- LLVMSupport
- )
-
add_dependencies(LLVMPowerPCDesc PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt b/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
index fc2da83..d3a567d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = PowerPCDesc
parent = PowerPC
required_libraries = MC PowerPCAsmPrinter PowerPCInfo Support
add_to_library_groups = PowerPC
-
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 9f2fd6d..34a5774 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -93,6 +93,16 @@ public:
// FIXME.
return false;
}
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // FIXME.
+ assert(0 && "RelaxInstruction() unimplemented");
+ return false;
+ }
+
void RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
// FIXME.
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 56f622e..5dc2d3d 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -365,11 +365,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case PPC::MFCRpseud:
+ case PPC::MFCR8pseud:
// Transform: %R3 = MFCRpseud %CR7
// Into: %R3 = MFCR ;; cr7
OutStreamer.AddComment(PPCInstPrinter::
getRegisterName(MI->getOperand(1).getReg()));
- TmpInst.setOpcode(PPC::MFCR);
+ TmpInst.setOpcode(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR);
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
OutStreamer.EmitInstruction(TmpInst);
return;
@@ -441,7 +442,7 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
Directive = PPC::DIR_970;
if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
Directive = PPC::DIR_7400;
- if (Subtarget.isPPC64() && Directive < PPC::DIR_970)
+ if (Subtarget.isPPC64() && Directive < PPC::DIR_64)
Directive = PPC::DIR_64;
assert(Directive <= PPC::DIR_64 && "Directive out of range.");
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 4a1f182..9d2f4d0 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -138,7 +138,8 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
unsigned OpNo) const {
const MachineOperand &MO = MI.getOperand(OpNo);
- assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+ assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 ||
+ MI.getOpcode() == PPC::MFOCRF) &&
(MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
return 0x80 >> getPPCRegisterNumbering(MO.getReg());
}
@@ -248,7 +249,8 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
if (MO.isReg()) {
// MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
// The GPR operand should come through here though.
- assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
+ assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 &&
+ MI.getOpcode() != PPC::MFOCRF) ||
MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
return getPPCRegisterNumbering(MO.getReg());
}
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 0b85fea..5c45018 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -64,7 +64,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) {
// epilog blocks.
for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
// If last instruction is a return instruction, add an epilogue
- if (!I->empty() && I->back().getDesc().isReturn()) {
+ if (!I->empty() && I->back().isReturn()) {
bool FoundIt = false;
for (MBBI = I->end(); MBBI != I->begin(); ) {
--MBBI;
@@ -244,8 +244,10 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
if (MF.getFunction()->hasFnAttr(Attribute::Naked))
return false;
- return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() ||
- (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects() ||
+ (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ MF.getInfo<PPCFunctionInfo>()->hasFastCall());
}
@@ -655,7 +657,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// Callee pop calling convention. Pop parameter/linkage area. Used for tail
// call optimization
- if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
+ if (MF.getTarget().Options.GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
MF.getFunction()->getCallingConv() == CallingConv::Fast) {
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
unsigned CallerAllocatedAmt = FI->getMinReservedArea();
@@ -758,7 +760,8 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Reserve stack space to move the linkage area to in case of a tail call.
int TCSPDelta = 0;
- if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
}
@@ -769,7 +772,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// FIXME: doesn't detect whether or not we need to spill vXX, which requires
// r0 for now.
- if (RegInfo->requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable.
+ if (RegInfo->requiresRegisterScavenging(MF))
if (needsFP(MF) || spillsCR(MF)) {
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
@@ -863,7 +866,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
// Take into account stack space reserved for tail calls.
int TCSPDelta = 0;
- if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
LowerBound = TCSPDelta;
}
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 3197fc8..ae317af 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -27,7 +27,6 @@ void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) {
const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
if (!MCID) {
// This is a PPC pseudo-instruction.
- // FIXME: Should something else be done?
return;
}
@@ -62,6 +61,7 @@ void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) {
PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
: TII(tii) {
+ LastWasBL8_ELF = false;
EndDispatchGroup();
}
@@ -80,12 +80,6 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
bool &isFirst, bool &isSingle,
bool &isCracked,
bool &isLoad, bool &isStore) {
- if ((int)Opcode >= 0) {
- isFirst = isSingle = isCracked = isLoad = isStore = false;
- return PPCII::PPC970_Pseudo;
- }
- Opcode = ~Opcode;
-
const MCInstrDesc &MCID = TII.get(Opcode);
isLoad = MCID.mayLoad();
@@ -102,29 +96,23 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
/// isLoadOfStoredAddress - If we have a load from the previously stored pointer
/// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
bool PPCHazardRecognizer970::
-isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const {
+isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
+ const Value *LoadValue) const {
for (unsigned i = 0, e = NumStores; i != e; ++i) {
// Handle exact and commuted addresses.
- if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i])
- return true;
- if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i])
+ if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i])
return true;
// Okay, we don't have an exact match, if this is an indexed offset, see if
// we have overlap (which happens during fp->int conversion for example).
- if (StorePtr2[i] == Ptr2) {
- if (ConstantSDNode *StoreOffset = dyn_cast<ConstantSDNode>(StorePtr1[i]))
- if (ConstantSDNode *LoadOffset = dyn_cast<ConstantSDNode>(Ptr1)) {
- // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check
- // to see if the load and store actually overlap.
- int StoreOffs = StoreOffset->getZExtValue();
- int LoadOffs = LoadOffset->getZExtValue();
- if (StoreOffs < LoadOffs) {
- if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true;
- } else {
- if (int(LoadOffs+LoadSize) > StoreOffs) return true;
- }
- }
+ if (StoreValue[i] == LoadValue) {
+ // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check
+ // to see if the load and store actually overlap.
+ if (StoreOffset[i] < LoadOffset) {
+ if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true;
+ } else {
+ if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true;
+ }
}
}
return false;
@@ -138,13 +126,26 @@ ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
getHazardType(SUnit *SU, int Stalls) {
assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
- const SDNode *Node = SU->getNode()->getGluedMachineNode();
+ MachineInstr *MI = SU->getInstr();
+
+ if (MI->isDebugValue())
+ return NoHazard;
+
+ unsigned Opcode = MI->getOpcode();
+
+ // If the last instruction was a BL8_ELF, then the NOP must follow it
+ // directly (this is strong requirement from the linker due to the ELF ABI).
+ // We return only Hazard (and not NoopHazard) because if the NOP is necessary
+ // then it will already be in the instruction stream (it is not always
+ // necessary; tail calls, for example, do not need it).
+ if (LastWasBL8_ELF && Opcode != PPC::NOP)
+ return Hazard;
+
bool isFirst, isSingle, isCracked, isLoad, isStore;
PPCII::PPC970_Unit InstrType =
- GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+ GetInstrType(Opcode, isFirst, isSingle, isCracked,
isLoad, isStore);
if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
- unsigned Opcode = Node->getMachineOpcode();
// We can only issue a PPC970_First/PPC970_Single instruction (such as
// crand/mtspr/etc) if this is the first cycle of the dispatch group.
@@ -181,55 +182,10 @@ getHazardType(SUnit *SU, int Stalls) {
// If this is a load following a store, make sure it's not to the same or
// overlapping address.
- if (isLoad && NumStores) {
- unsigned LoadSize;
- switch (Opcode) {
- default: llvm_unreachable("Unknown load!");
- case PPC::LBZ: case PPC::LBZU:
- case PPC::LBZX:
- case PPC::LBZ8: case PPC::LBZU8:
- case PPC::LBZX8:
- case PPC::LVEBX:
- LoadSize = 1;
- break;
- case PPC::LHA: case PPC::LHAU:
- case PPC::LHAX:
- case PPC::LHZ: case PPC::LHZU:
- case PPC::LHZX:
- case PPC::LVEHX:
- case PPC::LHBRX:
- case PPC::LHA8: case PPC::LHAU8:
- case PPC::LHAX8:
- case PPC::LHZ8: case PPC::LHZU8:
- case PPC::LHZX8:
- LoadSize = 2;
- break;
- case PPC::LFS: case PPC::LFSU:
- case PPC::LFSX:
- case PPC::LWZ: case PPC::LWZU:
- case PPC::LWZX:
- case PPC::LWA:
- case PPC::LWAX:
- case PPC::LVEWX:
- case PPC::LWBRX:
- case PPC::LWZ8:
- case PPC::LWZX8:
- LoadSize = 4;
- break;
- case PPC::LFD: case PPC::LFDU:
- case PPC::LFDX:
- case PPC::LD: case PPC::LDU:
- case PPC::LDX:
- LoadSize = 8;
- break;
- case PPC::LVX:
- case PPC::LVXL:
- LoadSize = 16;
- break;
- }
-
- if (isLoadOfStoredAddress(LoadSize,
- Node->getOperand(0), Node->getOperand(1)))
+ if (isLoad && NumStores && !MI->memoperands_empty()) {
+ MachineMemOperand *MO = *MI->memoperands_begin();
+ if (isLoadOfStoredAddress(MO->getSize(),
+ MO->getOffset(), MO->getValue()))
return NoopHazard;
}
@@ -237,66 +193,29 @@ getHazardType(SUnit *SU, int Stalls) {
}
void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
- const SDNode *Node = SU->getNode()->getGluedMachineNode();
+ MachineInstr *MI = SU->getInstr();
+
+ if (MI->isDebugValue())
+ return;
+
+ unsigned Opcode = MI->getOpcode();
+ LastWasBL8_ELF = (Opcode == PPC::BL8_ELF);
+
bool isFirst, isSingle, isCracked, isLoad, isStore;
PPCII::PPC970_Unit InstrType =
- GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+ GetInstrType(Opcode, isFirst, isSingle, isCracked,
isLoad, isStore);
if (InstrType == PPCII::PPC970_Pseudo) return;
- unsigned Opcode = Node->getMachineOpcode();
// Update structural hazard information.
if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true;
// Track the address stored to.
- if (isStore) {
- unsigned ThisStoreSize;
- switch (Opcode) {
- default: llvm_unreachable("Unknown store instruction!");
- case PPC::STB: case PPC::STB8:
- case PPC::STBU: case PPC::STBU8:
- case PPC::STBX: case PPC::STBX8:
- case PPC::STVEBX:
- ThisStoreSize = 1;
- break;
- case PPC::STH: case PPC::STH8:
- case PPC::STHU: case PPC::STHU8:
- case PPC::STHX: case PPC::STHX8:
- case PPC::STVEHX:
- case PPC::STHBRX:
- ThisStoreSize = 2;
- break;
- case PPC::STFS:
- case PPC::STFSU:
- case PPC::STFSX:
- case PPC::STWX: case PPC::STWX8:
- case PPC::STWUX:
- case PPC::STW: case PPC::STW8:
- case PPC::STWU:
- case PPC::STVEWX:
- case PPC::STFIWX:
- case PPC::STWBRX:
- ThisStoreSize = 4;
- break;
- case PPC::STD_32:
- case PPC::STDX_32:
- case PPC::STD:
- case PPC::STDU:
- case PPC::STFD:
- case PPC::STFDX:
- case PPC::STDX:
- case PPC::STDUX:
- ThisStoreSize = 8;
- break;
- case PPC::STVX:
- case PPC::STVXL:
- ThisStoreSize = 16;
- break;
- }
-
- StoreSize[NumStores] = ThisStoreSize;
- StorePtr1[NumStores] = Node->getOperand(1);
- StorePtr2[NumStores] = Node->getOperand(2);
+ if (isStore && NumStores < 4 && !MI->memoperands_empty()) {
+ MachineMemOperand *MO = *MI->memoperands_begin();
+ StoreSize[NumStores] = MO->getSize();
+ StoreOffset[NumStores] = MO->getOffset();
+ StoreValue[NumStores] = MO->getValue();
++NumStores;
}
@@ -319,3 +238,9 @@ void PPCHazardRecognizer970::AdvanceCycle() {
if (NumIssued == 5)
EndDispatchGroup();
}
+
+void PPCHazardRecognizer970::Reset() {
+ LastWasBL8_ELF = false;
+ EndDispatchGroup();
+}
+
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index 32fac91..95d0d64 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -49,14 +49,18 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
// HasCTRSet - If the CTR register is set in this group, disallow BCTRL.
bool HasCTRSet;
+ // Was the last instruction issued a BL8_ELF
+ bool LastWasBL8_ELF;
+
// StoredPtr - Keep track of the address of any store. If we see a load from
// the same address (or one that aliases it), disallow the store. We can have
// up to four stores in one dispatch group, hence we track up to 4.
//
// This is null if we haven't seen a store yet. We keep track of both
// operands of the store here, since we support [r+r] and [r+i] addressing.
- SDValue StorePtr1[4], StorePtr2[4];
- unsigned StoreSize[4];
+ const Value *StoreValue[4];
+ int64_t StoreOffset[4];
+ uint64_t StoreSize[4];
unsigned NumStores;
public:
@@ -64,6 +68,7 @@ public:
virtual HazardType getHazardType(SUnit *SU, int Stalls);
virtual void EmitInstruction(SUnit *SU);
virtual void AdvanceCycle();
+ virtual void Reset();
private:
/// EndDispatchGroup - Called when we are finishing a new dispatch group.
@@ -76,8 +81,8 @@ private:
bool &isFirst, bool &isSingle,bool &isCracked,
bool &isLoad, bool &isStore);
- bool isLoadOfStoredAddress(unsigned LoadSize,
- SDValue Ptr1, SDValue Ptr2) const;
+ bool isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
+ const Value *LoadValue) const;
};
} // end namespace llvm
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 3dee406..4a509a3 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -210,13 +210,13 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
// Find all return blocks, outputting a restore in each epilog.
for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
- if (!BB->empty() && BB->back().getDesc().isReturn()) {
+ if (!BB->empty() && BB->back().isReturn()) {
IP = BB->end(); --IP;
// Skip over all terminator instructions, which are part of the return
// sequence.
MachineBasicBlock::iterator I2 = IP;
- while (I2 != BB->begin() && (--I2)->getDesc().isTerminator())
+ while (I2 != BB->begin() && (--I2)->isTerminator())
IP = I2;
// Emit: MTVRSAVE InVRSave
@@ -1066,7 +1066,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Target = N->getOperand(1);
unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
- Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Target,
+ Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
Chain), 0);
return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 36d5c41..f3a3d17 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -103,6 +103,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// from FP_ROUND: that rounds to nearest, this rounds to zero.
setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
+ // We do not currently implment this libm ops for PowerPC.
+ setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
+
// PowerPC has no SREM/UREM instructions
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
@@ -146,9 +153,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
// PowerPC does not have ROTR
setOperationAction(ISD::ROTR, MVT::i32 , Expand);
@@ -332,7 +343,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
}
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
@@ -1667,7 +1680,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Potential tail calls could cause overwriting of argument stack slots.
- bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+ bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
unsigned PtrByteSize = 4;
// Assign locations to all of the incoming arguments.
@@ -1857,7 +1871,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
bool isPPC64 = PtrVT == MVT::i64;
// Potential tail calls could cause overwriting of argument stack slots.
- bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+ bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
unsigned PtrByteSize = isPPC64 ? 8 : 4;
unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
@@ -2263,9 +2278,9 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
// Tail call needs the stack to be aligned.
- if (CC==CallingConv::Fast && GuaranteedTailCallOpt) {
- unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
- getStackAlignment();
+ if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().
+ getFrameLowering()->getStackAlignment();
unsigned AlignMask = TargetAlign-1;
NumBytes = (NumBytes + AlignMask) & ~AlignMask;
}
@@ -2299,7 +2314,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const {
- if (!GuaranteedTailCallOpt)
+ if (!getTargetMachine().Options.GuaranteedTailCallOpt)
return false;
// Variable argument functions are not supported.
@@ -2752,7 +2767,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// the stack. Account for this here so these bytes can be pushed back on in
// PPCRegisterInfo::eliminateCallFramePseudoInstr.
int BytesCalleePops =
- (CallConv==CallingConv::Fast && GuaranteedTailCallOpt) ? NumBytes : 0;
+ (CallConv == CallingConv::Fast &&
+ getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
if (InFlag.getNode())
Ops.push_back(InFlag);
@@ -2868,7 +2884,8 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
// and restoring the callers stack pointer in this functions epilog. This is
// done because by tail calling the called function might overwrite the value
// in this function's (MF) stack pointer stack slot 0(SP).
- if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
// Count how many bytes are to be pushed on the stack, including the linkage
@@ -3075,7 +3092,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// and restoring the callers stack pointer in this functions epilog. This is
// done because by tail calling the called function might overwrite the value
// in this function's (MF) stack pointer stack slot 0(SP).
- if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
unsigned nAltivecParamsAtEnd = 0;
@@ -5754,7 +5772,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setFrameAddressIsTaken(true);
- bool is31 = (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) &&
+ bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects()) &&
MFI->getStackSize() &&
!MF.getFunction()->hasFnAttr(Attribute::Naked);
unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index e88ad37..cdbc264 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -223,6 +223,18 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
(TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
+// 64-but CR instructions
+def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS),
+ "mtcrf $FXM, $rS", BrMCRX>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM),
+ "", SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins),
+ "mfcr $rT", SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
//===----------------------------------------------------------------------===//
// 64-bit SPR manipulation instrs.
@@ -469,6 +481,12 @@ def RLDICR : MDForm_1<30, 1,
(outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME),
"rldicr $rA, $rS, $SH, $ME", IntRotateD,
[]>, isPPC64;
+
+def RLWINM8 : MForm_2<21,
+ (outs G8RC:$rA), (ins G8RC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ []>;
+
} // End FXU Operations.
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index b9a6297..6d16f1d 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -33,8 +33,8 @@
#include "PPCGenInstrInfo.inc"
namespace llvm {
-extern cl::opt<bool> EnablePPC32RS; // FIXME (64-bit): See PPCRegisterInfo.cpp.
-extern cl::opt<bool> EnablePPC64RS; // FIXME (64-bit): See PPCRegisterInfo.cpp.
+extern cl::opt<bool> DisablePPC32RS;
+extern cl::opt<bool> DisablePPC64RS;
}
using namespace llvm;
@@ -48,25 +48,32 @@ PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
const TargetMachine *TM,
const ScheduleDAG *DAG) const {
- // Should use subtarget info to pick the right hazard recognizer. For
- // now, always return a PPC970 recognizer.
- const TargetInstrInfo *TII = TM->getInstrInfo();
- (void)TII;
- assert(TII && "No InstrInfo?");
-
unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective();
if (Directive == PPC::DIR_440) {
const InstrItineraryData *II = TM->getInstrItineraryData();
return new PPCHazardRecognizer440(II, DAG);
}
- else {
- // Disable the hazard recognizer for now, as it doesn't support
- // bottom-up scheduling.
- //return new PPCHazardRecognizer970(*TII);
- return new ScheduleHazardRecognizer();
- }
+
+ return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
}
+/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
+/// to use for this target when scheduling the DAG.
+ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
+ const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+
+ // Most subtargets use a PPC970 recognizer.
+ if (Directive != PPC::DIR_440) {
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ assert(TII && "No InstrInfo?");
+
+ return new PPCHazardRecognizer970(*TII);
+ }
+
+ return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG);
+}
unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
switch (MI->getOpcode()) {
@@ -338,6 +345,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
}
+// This function returns true if a CR spill is necessary and false otherwise.
bool
PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
unsigned SrcReg, bool isKill,
@@ -369,7 +377,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
FrameIdx));
} else {
// FIXME: this spills LR immediately to memory in one step. To do this,
- // we use R11, which we know cannot be used in the prolog/epilog. This is
+ // we use X11, which we know cannot be used in the prolog/epilog. This is
// a hack.
NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11));
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
@@ -388,9 +396,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
} else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) {
- if ((EnablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
- (EnablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
- // FIXME (64-bit): Enable
+ if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
+ (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
.addReg(SrcReg,
getKillRegState(isKill)),
@@ -403,11 +410,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
// We hack this on Darwin by reserving R2. It's probably broken on Linux
// at the moment.
+ bool is64Bit = TM.getSubtargetImpl()->isPPC64();
// We need to store the CR in the low 4-bits of the saved value. First,
// issue a MFCR to save all of the CRBits.
unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
- PPC::R2 : PPC::R0;
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg)
+ (is64Bit ? PPC::X2 : PPC::R2) :
+ (is64Bit ? PPC::X0 : PPC::R0);
+ NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud :
+ PPC::MFCRpseud), ScratchReg)
.addReg(SrcReg, getKillRegState(isKill)));
// If the saved register wasn't CR0, shift the bits left so that they are
@@ -415,12 +425,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
if (SrcReg != PPC::CR0) {
unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4;
// rlwinm scratch, scratch, ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+ NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 :
+ PPC::RLWINM), ScratchReg)
.addReg(ScratchReg).addImm(ShiftBits)
.addImm(0).addImm(31));
}
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ?
+ PPC::STW8 : PPC::STW))
.addReg(ScratchReg,
getKillRegState(isKill)),
FrameIdx));
@@ -504,7 +516,7 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
NewMIs.back()->addMemOperand(MF, MMO);
}
-void
+bool
PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
@@ -524,8 +536,8 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
FrameIdx));
} else {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD),
- PPC::R11), FrameIdx));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::R11));
+ PPC::X11), FrameIdx));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::X11));
}
} else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
@@ -534,28 +546,37 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
FrameIdx));
} else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) {
- // FIXME: We need a scatch reg here. The trouble with using R0 is that
- // it's possible for the stack frame to be so big the save location is
- // out of range of immediate offsets, necessitating another register.
- // We hack this on Darwin by reserving R2. It's probably broken on Linux
- // at the moment.
- unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
- PPC::R2 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
- ScratchReg), FrameIdx));
-
- // If the reloaded register isn't CR0, shift the bits right so that they are
- // in the right CR's slot.
- if (DestReg != PPC::CR0) {
- unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
- // rlwinm r11, r11, 32-ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
- .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
- .addImm(31));
+ if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
+ (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_CR), DestReg)
+ , FrameIdx));
+ return true;
+ } else {
+ // FIXME: We need a scatch reg here. The trouble with using R0 is that
+ // it's possible for the stack frame to be so big the save location is
+ // out of range of immediate offsets, necessitating another register.
+ // We hack this on Darwin by reserving R2. It's probably broken on Linux
+ // at the moment.
+ unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+ PPC::R2 : PPC::R0;
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ ScratchReg), FrameIdx));
+
+ // If the reloaded register isn't CR0, shift the bits right so that they are
+ // in the right CR's slot.
+ if (DestReg != PPC::CR0) {
+ unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
+ // rlwinm r11, r11, 32-ShiftBits, 0, 31.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+ .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
+ .addImm(31));
+ }
+
+ NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ?
+ PPC::MTCRF8 : PPC::MTCRF), DestReg)
+ .addReg(ScratchReg));
}
-
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg)
- .addReg(ScratchReg));
} else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) {
unsigned Reg = 0;
@@ -600,6 +621,8 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
} else {
llvm_unreachable("Unknown regclass!");
}
+
+ return false;
}
void
@@ -612,7 +635,10 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
SmallVector<MachineInstr*, 4> NewMIs;
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
- LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
+ if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) {
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setSpillsCR();
+ }
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 90bacc9..e90f8cb 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -72,7 +72,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs) const;
- void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+ bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs) const;
@@ -88,6 +88,9 @@ public:
ScheduleHazardRecognizer *
CreateTargetHazardRecognizer(const TargetMachine *TM,
const ScheduleDAG *DAG) const;
+ ScheduleHazardRecognizer *
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const;
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 17f63e0..d4c9d10 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -349,7 +349,7 @@ def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.
-def FPContractions : Predicate<"!NoExcessFPPrecision">;
+def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">;
def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
def IsBookE : Predicate<"PPCSubTarget.isBookE()">;
@@ -399,7 +399,14 @@ let usesCustomInserter = 1, // Expanded after instruction selection.
// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
// scavenge a register for it.
-def SPILL_CR : Pseudo<(outs), (ins GPRC:$cond, memri:$F),
+let mayStore = 1 in
+def SPILL_CR : Pseudo<(outs), (ins CRRC:$cond, memri:$F),
+ "", []>;
+
+// RESTORE_CR - Indicate that we're restoring the CR register (previously
+// spilled), so we'll need to scavenge a register for it.
+let mayLoad = 1 in
+def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F),
"", []>;
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
@@ -1091,7 +1098,7 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
"mfspr $rT, 256", IntGeneral>,
PPC970_DGroup_First, PPC970_Unit_FXU;
-def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS),
+def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 3ba9260..27f7f4a 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -46,15 +46,14 @@
#define GET_REGINFO_TARGET_DESC
#include "PPCGenRegisterInfo.inc"
-// FIXME (64-bit): Eventually enable by default.
namespace llvm {
-cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger",
+cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger",
cl::init(false),
- cl::desc("Enable PPC32 register scavenger"),
+ cl::desc("Disable PPC32 register scavenger"),
cl::Hidden);
-cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger",
+cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger",
cl::init(false),
- cl::desc("Enable PPC64 register scavenger"),
+ cl::desc("Disable PPC64 register scavenger"),
cl::Hidden);
}
@@ -63,8 +62,8 @@ using namespace llvm;
// FIXME (64-bit): Should be inlined.
bool
PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
- return ((EnablePPC32RS && !Subtarget.isPPC64()) ||
- (EnablePPC64RS && Subtarget.isPPC64()));
+ return ((!DisablePPC32RS && !Subtarget.isPPC64()) ||
+ (!DisablePPC64RS && Subtarget.isPPC64()));
}
PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
@@ -120,10 +119,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
PPC::V24, PPC::V25, PPC::V26, PPC::V27,
PPC::V28, PPC::V29, PPC::V30, PPC::V31,
- PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
- PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
- PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
-
PPC::LR, 0
};
@@ -149,10 +144,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
PPC::V24, PPC::V25, PPC::V26, PPC::V27,
PPC::V28, PPC::V29, PPC::V30, PPC::V31,
- PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
- PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
- PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
-
0
};
// 64-bit Darwin calling convention.
@@ -174,10 +165,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
PPC::V24, PPC::V25, PPC::V26, PPC::V27,
PPC::V28, PPC::V29, PPC::V30, PPC::V31,
- PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
- PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
- PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
-
PPC::LR8, 0
};
@@ -203,10 +190,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
PPC::V24, PPC::V25, PPC::V26, PPC::V27,
PPC::V28, PPC::V29, PPC::V30, PPC::V31,
- PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
- PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
- PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
-
0
};
@@ -247,9 +230,6 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::R13);
Reserved.set(PPC::R31);
- if (!requiresRegisterScavenging(MF))
- Reserved.set(PPC::R0); // FIXME (64-bit): Remove
-
Reserved.set(PPC::X0);
Reserved.set(PPC::X1);
Reserved.set(PPC::X13);
@@ -259,7 +239,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (Subtarget.isSVR4ABI()) {
Reserved.set(PPC::X2);
}
- // Reserve R2 on Darwin to hack around the problem of save/restore of CR
+ // Reserve X2 on Darwin to hack around the problem of save/restore of CR
// when the stack frame is too big to address directly; we need two regs.
// This is a hack.
if (Subtarget.isDarwinABI()) {
@@ -291,6 +271,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case PPC::F4RCRegClassID:
case PPC::VRRCRegClassID:
return 32 - DefaultSafety;
+ case PPC::CRRCRegClassID:
+ return 8 - DefaultSafety;
}
}
@@ -301,7 +283,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
void PPCRegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- if (GuaranteedTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) {
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ I->getOpcode() == PPC::ADJCALLSTACKUP) {
// Add (actually subtract) back the amount the callee popped on return.
if (int CalleeAmt = I->getOperand(1).getImm()) {
bool is64Bit = Subtarget.isPPC64();
@@ -476,28 +459,32 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
unsigned FrameIndex, int SPAdj,
RegScavenger *RS) const {
// Get the instruction.
- MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset>, <FI>
+ MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset>
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc dl = MI.getDebugLoc();
- const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
- unsigned Reg = findScratchRegister(II, RS, RC, SPAdj);
- unsigned SrcReg = MI.getOperand(0).getReg();
+ // FIXME: Once LLVM supports creating virtual registers here, or the register
+ // scavenger can return multiple registers, stop using reserved registers
+ // here.
+ (void) SPAdj;
+ (void) RS;
+
bool LP64 = Subtarget.isPPC64();
+ unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) :
+ (LP64 ? PPC::X0 : PPC::R0);
+ unsigned SrcReg = MI.getOperand(0).getReg();
// We need to store the CR in the low 4-bits of the saved value. First, issue
// an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg.
- BuildMI(MBB, II, dl, TII.get(PPC::MFCRpseud), Reg)
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFCR8pseud : PPC::MFCRpseud), Reg)
.addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
// If the saved register wasn't CR0, shift the bits left so that they are in
// CR0's slot.
if (SrcReg != PPC::CR0)
// rlwinm rA, rA, ShiftBits, 0, 31.
- BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg)
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
.addReg(Reg, RegState::Kill)
.addImm(getPPCRegisterNumbering(SrcReg) * 4)
.addImm(0)
@@ -511,6 +498,48 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
MBB.erase(II);
}
+void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex, int SPAdj,
+ RegScavenger *RS) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CR <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ // FIXME: Once LLVM supports creating virtual registers here, or the register
+ // scavenger can return multiple registers, stop using reserved registers
+ // here.
+ (void) SPAdj;
+ (void) RS;
+
+ bool LP64 = Subtarget.isPPC64();
+ unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) :
+ (LP64 ? PPC::X0 : PPC::R0);
+ unsigned DestReg = MI.getOperand(0).getReg();
+ assert(MI.definesRegister(DestReg) &&
+ "RESTORE_CR does not define its destination");
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LWZ8 : PPC::LWZ),
+ Reg), FrameIndex);
+
+ // If the reloaded register isn't CR0, shift the bits right so that they are
+ // in the right CR's slot.
+ if (DestReg != PPC::CR0) {
+ unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
+ // rlwinm r11, r11, 32-ShiftBits, 0, 31.
+ BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg)
+ .addReg(Reg).addImm(32-ShiftBits).addImm(0)
+ .addImm(31);
+ }
+
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTCRF8 : PPC::MTCRF), DestReg)
+ .addReg(Reg);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
void
PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS) const {
@@ -556,16 +585,23 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
return;
}
- // Special case for pseudo-op SPILL_CR.
- if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable by default.
+ // Special case for pseudo-ops SPILL_CR and RESTORE_CR.
+ if (requiresRegisterScavenging(MF)) {
if (OpC == PPC::SPILL_CR) {
lowerCRSpilling(II, FrameIndex, SPAdj, RS);
return;
+ } else if (OpC == PPC::RESTORE_CR) {
+ lowerCRRestore(II, FrameIndex, SPAdj, RS);
+ return;
}
+ }
// Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
+
+ bool is64Bit = Subtarget.isPPC64();
MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ?
- PPC::R31 : PPC::R1,
+ (is64Bit ? PPC::X31 : PPC::R31) :
+ (is64Bit ? PPC::X1 : PPC::R1),
false);
// Figure out if the offset in the instruction is shifted right two bits. This
@@ -611,19 +647,19 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// The offset doesn't fit into a single register, scavenge one to build the
// offset in.
- // FIXME: figure out what SPAdj is doing here.
- // FIXME (64-bit): Use "findScratchRegister".
unsigned SReg;
- if (requiresRegisterScavenging(MF))
- SReg = findScratchRegister(II, RS, &PPC::GPRCRegClass, SPAdj);
- else
- SReg = PPC::R0;
+ if (requiresRegisterScavenging(MF)) {
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj);
+ } else
+ SReg = is64Bit ? PPC::X0 : PPC::R0;
// Insert a set of rA with the full offset value before the ld, st, or add
- BuildMI(MBB, II, dl, TII.get(PPC::LIS), SReg)
+ BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg)
.addImm(Offset >> 16);
- BuildMI(MBB, II, dl, TII.get(PPC::ORI), SReg)
+ BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg)
.addReg(SReg, RegState::Kill)
.addImm(Offset);
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index f70a594..faf690f 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -57,6 +57,8 @@ public:
int SPAdj, RegScavenger *RS) const;
void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
int SPAdj, RegScavenger *RS) const;
+ void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex,
+ int SPAdj, RegScavenger *RS) const;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS = NULL) const;
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 8acf75c..baa0eb5 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "PPCSubtarget.h"
+#include "PPCRegisterInfo.h"
#include "PPC.h"
#include "llvm/GlobalValue.h"
#include "llvm/Target/TargetMachine.h"
@@ -140,3 +141,22 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
GV->hasCommonLinkage() || isDecl;
}
+
+bool PPCSubtarget::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ if (DarwinDirective == PPC::DIR_440)
+ return false;
+
+ Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ CriticalPathRCs.clear();
+
+ if (isPPC64())
+ CriticalPathRCs.push_back(&PPC::G8RCRegClass);
+ else
+ CriticalPathRCs.push_back(&PPC::GPRCRegClass);
+
+ return OptLevel >= CodeGenOpt::Default;
+}
+
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index d2b853d..62b2424 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -148,6 +148,10 @@ public:
bool isDarwinABI() const { return isDarwin(); }
bool isSVR4ABI() const { return !isDarwin(); }
+ /// enablePostRAScheduler - True at 'More' optimization.
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
};
} // End llvm namespace
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index de8fca0..8e71c46 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -28,10 +28,11 @@ extern "C" void LLVMInitializePowerPCTarget() {
PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS, is64Bit),
DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
FrameLowering(Subtarget), JITInfo(*this, is64Bit),
@@ -45,17 +46,19 @@ bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; }
PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : PPCTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {
+ : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
}
PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : PPCTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {
+ : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
}
@@ -81,7 +84,7 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
if (Subtarget.isPPC64())
// Temporary workaround for the inability of PPC64 JIT to handle jump
// tables.
- DisableJumpTables = true;
+ Options.DisableJumpTables = true;
// Inform the subtarget that we are in JIT mode. FIXME: does this break macho
// writing?
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 03b27c6..0427876 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -41,7 +41,7 @@ class PPCTargetMachine : public LLVMTargetMachine {
public:
PPCTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64Bit);
@@ -79,7 +79,7 @@ public:
class PPC32TargetMachine : public PPCTargetMachine {
public:
PPC32TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
@@ -89,7 +89,7 @@ public:
class PPC64TargetMachine : public PPCTargetMachine {
public:
PPC64TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
diff --git a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
index f63111f..fdb8a62 100644
--- a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
+++ b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMPowerPCInfo
PowerPCTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMPowerPCInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMPowerPCInfo PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
index f51b417..f77d85b 100644
--- a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = PowerPCInfo
parent = PowerPC
required_libraries = MC Support Target
add_to_library_groups = PowerPC
-
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index 9687951..56ee7c2 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -22,17 +22,5 @@ add_llvm_target(SparcCodeGen
SparcSelectionDAGInfo.cpp
)
-add_llvm_library_dependencies(LLVMSparcCodeGen
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSelectionDAG
- LLVMSparcDesc
- LLVMSparcInfo
- LLVMSupport
- LLVMTarget
- )
-
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index dab35e5..9295408 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -100,7 +100,7 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
- if (I->getDesc().hasDelaySlot()) {
+ if (I->hasDelaySlot()) {
MachineBasicBlock::iterator D = MBB.end();
MachineBasicBlock::iterator J = I;
@@ -149,7 +149,7 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
}
//Call's delay filler can def some of call's uses.
- if (slot->getDesc().isCall())
+ if (slot->isCall())
insertCallUses(slot, RegUses);
else
insertDefsUses(slot, RegDefs, RegUses);
@@ -170,7 +170,7 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
if (I->hasUnmodeledSideEffects()
|| I->isInlineAsm()
|| I->isLabel()
- || I->getDesc().hasDelaySlot()
+ || I->hasDelaySlot()
|| isDelayFiller(MBB, I))
break;
@@ -194,13 +194,13 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
if (candidate->isImplicitDef() || candidate->isKill())
return true;
- if (candidate->getDesc().mayLoad()) {
+ if (candidate->mayLoad()) {
sawLoad = true;
if (sawStore)
return true;
}
- if (candidate->getDesc().mayStore()) {
+ if (candidate->mayStore()) {
if (sawStore)
return true;
sawStore = true;
@@ -298,13 +298,13 @@ bool Filler::isDelayFiller(MachineBasicBlock &MBB,
return false;
if (candidate->getOpcode() == SP::UNIMP)
return true;
- const MCInstrDesc &prevdesc = (--candidate)->getDesc();
- return prevdesc.hasDelaySlot();
+ --candidate;
+ return candidate->hasDelaySlot();
}
bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize)
{
- if (!I->getDesc().isCall())
+ if (!I->isCall())
return false;
unsigned structSizeOpNum = 0;
diff --git a/lib/Target/Sparc/LLVMBuild.txt b/lib/Target/Sparc/LLVMBuild.txt
index 38c797f..fe20d2f 100644
--- a/lib/Target/Sparc/LLVMBuild.txt
+++ b/lib/Target/Sparc/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = MCTargetDesc TargetInfo
+
[component_0]
type = TargetGroup
name = Sparc
@@ -27,4 +30,3 @@ name = SparcCodeGen
parent = Sparc
required_libraries = AsmPrinter CodeGen Core MC SelectionDAG SparcDesc SparcInfo Support Target
add_to_library_groups = Sparc
-
diff --git a/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
index d3bdf0b..9d4db4d 100644
--- a/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
@@ -3,10 +3,4 @@ add_llvm_library(LLVMSparcDesc
SparcMCAsmInfo.cpp
)
-add_llvm_library_dependencies(LLVMSparcDesc
- LLVMMC
- LLVMSparcInfo
- LLVMSupport
- )
-
add_dependencies(LLVMSparcDesc SparcCommonTableGen)
diff --git a/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt b/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt
index a339cec..97f8f16 100644
--- a/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = SparcDesc
parent = Sparc
required_libraries = MC SparcInfo Support
add_to_library_groups = Sparc
-
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index deb39d9..7548bbf 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -236,9 +236,9 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
// Check if the last terminator is an unconditional branch.
MachineBasicBlock::const_iterator I = Pred->end();
- while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+ while (I != Pred->begin() && !(--I)->isTerminator())
; // Noop
- return I == Pred->end() || !I->getDesc().isBarrier();
+ return I == Pred->end() || !I->isBarrier();
}
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 25104d1..3608d3b 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -763,7 +763,9 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FMA , MVT::f32, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::ROTL , MVT::i32, Expand);
setOperationAction(ISD::ROTR , MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 7a6bf50..5290d42 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -133,7 +133,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
break;
//Terminator is not a branch
- if (!I->getDesc().isBranch())
+ if (!I->isBranch())
return true;
//Handle Unconditional branches
@@ -195,7 +195,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
.addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode);
BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA))
.addMBB(TargetBB);
- MBB.addSuccessor(TargetBB);
+
OldInst->eraseFromParent();
UnCondBrIter->eraseFromParent();
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 7dff799..8e16fd7 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -26,10 +26,11 @@ extern "C" void LLVMInitializeSparcTarget() {
///
SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool is64bit)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS, is64bit),
DataLayout(Subtarget.getDataLayout()),
TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
@@ -52,16 +53,20 @@ bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM){
SparcV8TargetMachine::SparcV8TargetMachine(const Target &T,
StringRef TT, StringRef CPU,
- StringRef FS, Reloc::Model RM,
+ StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL)
- : SparcTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {
+ : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
}
SparcV9TargetMachine::SparcV9TargetMachine(const Target &T,
StringRef TT, StringRef CPU,
- StringRef FS, Reloc::Model RM,
+ StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL)
- : SparcTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {
+ : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
}
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 63bfa5d..cedc1e3 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -34,9 +34,9 @@ class SparcTargetMachine : public LLVMTargetMachine {
SparcFrameLowering FrameLowering;
public:
SparcTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL, bool is64bit);
+ CodeGenOpt::Level OL, bool is64bit);
virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetFrameLowering *getFrameLowering() const {
@@ -65,6 +65,7 @@ class SparcV8TargetMachine : public SparcTargetMachine {
public:
SparcV8TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
@@ -75,6 +76,7 @@ class SparcV9TargetMachine : public SparcTargetMachine {
public:
SparcV9TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
diff --git a/lib/Target/Sparc/TargetInfo/CMakeLists.txt b/lib/Target/Sparc/TargetInfo/CMakeLists.txt
index a076023..b0d031e 100644
--- a/lib/Target/Sparc/TargetInfo/CMakeLists.txt
+++ b/lib/Target/Sparc/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMSparcInfo
SparcTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMSparcInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMSparcInfo SparcCommonTableGen)
diff --git a/lib/Target/Sparc/TargetInfo/LLVMBuild.txt b/lib/Target/Sparc/TargetInfo/LLVMBuild.txt
index 81c9032..b5c320f 100644
--- a/lib/Target/Sparc/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/Sparc/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = SparcInfo
parent = Sparc
required_libraries = MC Support Target
add_to_library_groups = Sparc
-
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
index d52ecb3..440f9ad 100644
--- a/lib/Target/TargetInstrInfo.cpp
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -13,7 +13,6 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/ErrorHandling.h"
@@ -73,23 +72,6 @@ TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
}
-int
-TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
- SDNode *DefNode, unsigned DefIdx,
- SDNode *UseNode, unsigned UseIdx) const {
- if (!ItinData || ItinData->isEmpty())
- return -1;
-
- if (!DefNode->isMachineOpcode())
- return -1;
-
- unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
- if (!UseNode->isMachineOpcode())
- return ItinData->getOperandCycle(DefClass, DefIdx);
- unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
- return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
-}
-
int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
@@ -99,17 +81,6 @@ int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
return ItinData->getStageLatency(MI->getDesc().getSchedClass());
}
-int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
- SDNode *N) const {
- if (!ItinData || ItinData->isEmpty())
- return 1;
-
- if (!N->isMachineOpcode())
- return 1;
-
- return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
-}
-
bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI,
unsigned DefIdx) const {
@@ -129,19 +100,6 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
}
-bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isTerminator()) return false;
-
- // Conditional branch is a special case.
- if (MCID.isBranch() && !MCID.isBarrier())
- return true;
- if (!MCID.isPredicable())
- return true;
- return !isPredicated(MI);
-}
-
-
/// Measure the specified inline asm to determine an approximation of its
/// length.
/// Comments (which run till the next SeparatorString or newline) do not
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index aa2e014..768facb 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -22,15 +22,96 @@ char TargetLibraryInfo::ID = 0;
const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
{
- "memset",
+ "acos",
+ "acosl",
+ "acosf",
+ "asin",
+ "asinl",
+ "asinf",
+ "atan",
+ "atanl",
+ "atanf",
+ "atan2",
+ "atan2l",
+ "atan2f",
+ "ceil",
+ "ceill",
+ "ceilf",
+ "copysign",
+ "copysignf",
+ "copysignl",
+ "cos",
+ "cosl",
+ "cosf",
+ "cosh",
+ "coshl",
+ "coshf",
+ "exp",
+ "expl",
+ "expf",
+ "exp2",
+ "exp2l",
+ "exp2f",
+ "expm1",
+ "expm1l",
+ "expl1f",
+ "fabs",
+ "fabsl",
+ "fabsf",
+ "floor",
+ "floorl",
+ "floorf",
+ "fiprintf",
+ "fmod",
+ "fmodl",
+ "fmodf",
+ "fputs",
+ "fwrite",
+ "iprintf",
+ "log",
+ "logl",
+ "logf",
+ "log2",
+ "log2l",
+ "log2f",
+ "log10",
+ "log10l",
+ "log10f",
+ "log1p",
+ "log1pl",
+ "log1pf",
"memcpy",
"memmove",
+ "memset",
"memset_pattern16",
- "iprintf",
+ "nearbyint",
+ "nearbyintf",
+ "nearbyintl",
+ "pow",
+ "powf",
+ "powl",
+ "rint",
+ "rintf",
+ "rintl",
+ "sin",
+ "sinl",
+ "sinf",
+ "sinh",
+ "sinhl",
+ "sinhf",
"siprintf",
- "fiprintf",
- "fwrite",
- "fputs"
+ "sqrt",
+ "sqrtl",
+ "sqrtf",
+ "tan",
+ "tanl",
+ "tanf",
+ "tanh",
+ "tanhl",
+ "tanhf",
+ "trunc",
+ "truncf",
+ "truncl"
};
/// initialize - Initialize the set of available library functions based on the
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 56b7b69..fc8b67b 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -48,7 +48,7 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx,
TargetLoweringObjectFile::~TargetLoweringObjectFile() {
}
-static bool isSuitableForBSS(const GlobalVariable *GV) {
+static bool isSuitableForBSS(const GlobalVariable *GV, bool NoZerosInBSS) {
const Constant *C = GV->getInitializer();
// Must have zero initializer.
@@ -133,7 +133,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
// Handle thread-local data first.
if (GVar->isThreadLocal()) {
- if (isSuitableForBSS(GVar))
+ if (isSuitableForBSS(GVar, TM.Options.NoZerosInBSS))
return SectionKind::getThreadBSS();
return SectionKind::getThreadData();
}
@@ -143,7 +143,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
return SectionKind::getCommon();
// Variable can be easily put to BSS section.
- if (isSuitableForBSS(GVar)) {
+ if (isSuitableForBSS(GVar, TM.Options.NoZerosInBSS)) {
if (GVar->hasLocalLinkage())
return SectionKind::getBSSLocal();
else if (GVar->hasExternalLinkage())
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 805e16e..fb7bbbb 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -24,153 +22,11 @@ using namespace llvm;
//
namespace llvm {
- bool LessPreciseFPMADOption;
- bool PrintMachineCode;
- bool NoFramePointerElim;
- bool NoFramePointerElimNonLeaf;
- bool NoExcessFPPrecision;
- bool UnsafeFPMath;
- bool NoInfsFPMath;
- bool NoNaNsFPMath;
- bool HonorSignDependentRoundingFPMathOption;
- bool UseSoftFloat;
- FloatABI::ABIType FloatABIType;
- bool NoImplicitFloat;
- bool NoZerosInBSS;
- bool JITExceptionHandling;
- bool JITEmitDebugInfo;
- bool JITEmitDebugInfoToDisk;
- bool GuaranteedTailCallOpt;
- unsigned StackAlignmentOverride;
- bool RealignStack;
- bool DisableJumpTables;
bool StrongPHIElim;
bool HasDivModLibcall;
bool AsmVerbosityDefault(false);
- bool EnableSegmentedStacks;
}
-static cl::opt<bool, true>
-PrintCode("print-machineinstrs",
- cl::desc("Print generated machine code"),
- cl::location(PrintMachineCode), cl::init(false));
-static cl::opt<bool, true>
-DisableFPElim("disable-fp-elim",
- cl::desc("Disable frame pointer elimination optimization"),
- cl::location(NoFramePointerElim),
- cl::init(false));
-static cl::opt<bool, true>
-DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
- cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"),
- cl::location(NoFramePointerElimNonLeaf),
- cl::init(false));
-static cl::opt<bool, true>
-DisableExcessPrecision("disable-excess-fp-precision",
- cl::desc("Disable optimizations that may increase FP precision"),
- cl::location(NoExcessFPPrecision),
- cl::init(false));
-static cl::opt<bool, true>
-EnableFPMAD("enable-fp-mad",
- cl::desc("Enable less precise MAD instructions to be generated"),
- cl::location(LessPreciseFPMADOption),
- cl::init(false));
-static cl::opt<bool, true>
-EnableUnsafeFPMath("enable-unsafe-fp-math",
- cl::desc("Enable optimizations that may decrease FP precision"),
- cl::location(UnsafeFPMath),
- cl::init(false));
-static cl::opt<bool, true>
-EnableNoInfsFPMath("enable-no-infs-fp-math",
- cl::desc("Enable FP math optimizations that assume no +-Infs"),
- cl::location(NoInfsFPMath),
- cl::init(false));
-static cl::opt<bool, true>
-EnableNoNaNsFPMath("enable-no-nans-fp-math",
- cl::desc("Enable FP math optimizations that assume no NaNs"),
- cl::location(NoNaNsFPMath),
- cl::init(false));
-static cl::opt<bool, true>
-EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
- cl::Hidden,
- cl::desc("Force codegen to assume rounding mode can change dynamically"),
- cl::location(HonorSignDependentRoundingFPMathOption),
- cl::init(false));
-static cl::opt<bool, true>
-GenerateSoftFloatCalls("soft-float",
- cl::desc("Generate software floating point library calls"),
- cl::location(UseSoftFloat),
- cl::init(false));
-static cl::opt<llvm::FloatABI::ABIType, true>
-FloatABIForCalls("float-abi",
- cl::desc("Choose float ABI type"),
- cl::location(FloatABIType),
- cl::init(FloatABI::Default),
- cl::values(
- clEnumValN(FloatABI::Default, "default",
- "Target default float ABI type"),
- clEnumValN(FloatABI::Soft, "soft",
- "Soft float ABI (implied by -soft-float)"),
- clEnumValN(FloatABI::Hard, "hard",
- "Hard float ABI (uses FP registers)"),
- clEnumValEnd));
-static cl::opt<bool, true>
-DontPlaceZerosInBSS("nozero-initialized-in-bss",
- cl::desc("Don't place zero-initialized symbols into bss section"),
- cl::location(NoZerosInBSS),
- cl::init(false));
-static cl::opt<bool, true>
-EnableJITExceptionHandling("jit-enable-eh",
- cl::desc("Emit exception handling information"),
- cl::location(JITExceptionHandling),
- cl::init(false));
-// In debug builds, make this default to true.
-#ifdef NDEBUG
-#define EMIT_DEBUG false
-#else
-#define EMIT_DEBUG true
-#endif
-static cl::opt<bool, true>
-EmitJitDebugInfo("jit-emit-debug",
- cl::desc("Emit debug information to debugger"),
- cl::location(JITEmitDebugInfo),
- cl::init(EMIT_DEBUG));
-#undef EMIT_DEBUG
-static cl::opt<bool, true>
-EmitJitDebugInfoToDisk("jit-emit-debug-to-disk",
- cl::Hidden,
- cl::desc("Emit debug info objfiles to disk"),
- cl::location(JITEmitDebugInfoToDisk),
- cl::init(false));
-
-static cl::opt<bool, true>
-EnableGuaranteedTailCallOpt("tailcallopt",
- cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
- cl::location(GuaranteedTailCallOpt),
- cl::init(false));
-static cl::opt<unsigned, true>
-OverrideStackAlignment("stack-alignment",
- cl::desc("Override default stack alignment"),
- cl::location(StackAlignmentOverride),
- cl::init(0));
-static cl::opt<bool, true>
-EnableRealignStack("realign-stack",
- cl::desc("Realign stack if needed"),
- cl::location(RealignStack),
- cl::init(true));
-static cl::opt<bool, true>
-DisableSwitchTables(cl::Hidden, "disable-jump-tables",
- cl::desc("Do not generate jump tables."),
- cl::location(DisableJumpTables),
- cl::init(false));
-static cl::opt<bool, true>
-EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
- cl::desc("Use strong PHI elimination."),
- cl::location(StrongPHIElim),
- cl::init(false));
-static cl::opt<std::string>
-TrapFuncName("trap-func", cl::Hidden,
- cl::desc("Emit a call to trap function rather than a trap instruction"),
- cl::init(""));
static cl::opt<bool>
DataSections("fdata-sections",
cl::desc("Emit data into separate sections"),
@@ -179,18 +35,14 @@ static cl::opt<bool>
FunctionSections("ffunction-sections",
cl::desc("Emit functions into separate sections"),
cl::init(false));
-static cl::opt<bool, true>
-SegmentedStacks("segmented-stacks",
- cl::desc("Use segmented stacks if possible."),
- cl::location(EnableSegmentedStacks),
- cl::init(false));
//---------------------------------------------------------------------------
// TargetMachine Class
//
TargetMachine::TargetMachine(const Target &T,
- StringRef TT, StringRef CPU, StringRef FS)
+ StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options)
: TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS),
CodeGenInfo(0), AsmInfo(0),
MCRelaxAll(false),
@@ -198,11 +50,8 @@ TargetMachine::TargetMachine(const Target &T,
MCSaveTempLabels(false),
MCUseLoc(true),
MCUseCFI(true),
- MCUseDwarfDirectory(false) {
- // Typically it will be subtargets that will adjust FloatABIType from Default
- // to Soft or Hard.
- if (UseSoftFloat)
- FloatABIType = FloatABI::Soft;
+ MCUseDwarfDirectory(false),
+ Options(Options) {
}
TargetMachine::~TargetMachine() {
@@ -258,36 +107,3 @@ void TargetMachine::setDataSections(bool V) {
DataSections = V;
}
-namespace llvm {
- /// DisableFramePointerElim - This returns true if frame pointer elimination
- /// optimization should be disabled for the given machine function.
- bool DisableFramePointerElim(const MachineFunction &MF) {
- // Check to see if we should eliminate non-leaf frame pointers and then
- // check to see if we should eliminate all frame pointers.
- if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return MFI->hasCalls();
- }
-
- return NoFramePointerElim;
- }
-
- /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
- /// is specified on the command line. When this flag is off(default), the
- /// code generator is not allowed to generate mad (multiply add) if the
- /// result is "less precise" than doing those operations individually.
- bool LessPreciseFPMAD() { return UnsafeFPMath || LessPreciseFPMADOption; }
-
- /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
- /// that the rounding mode of the FPU can change from its default.
- bool HonorSignDependentRoundingFPMath() {
- return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
- }
-
- /// getTrapFunctionName - If this returns a non-empty string, this means isel
- /// should lower Intrinsic::trap to a call to the specified function name
- /// instead of an ISD::TRAP node.
- StringRef getTrapFunctionName() {
- return TrapFuncName;
- }
-}
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index 67239b8..2689837 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -13,8 +13,6 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt
index 94aca7a..47489bb 100644
--- a/lib/Target/X86/AsmParser/CMakeLists.txt
+++ b/lib/Target/X86/AsmParser/CMakeLists.txt
@@ -5,12 +5,4 @@ add_llvm_library(LLVMX86AsmParser
X86AsmParser.cpp
)
-add_llvm_library_dependencies(LLVMX86AsmParser
- LLVMMC
- LLVMMCParser
- LLVMSupport
- LLVMX86Desc
- LLVMX86Info
- )
-
add_dependencies(LLVMX86AsmParser X86CommonTableGen)
diff --git a/lib/Target/X86/AsmParser/LLVMBuild.txt b/lib/Target/X86/AsmParser/LLVMBuild.txt
index 6c2405a..9f94d5d 100644
--- a/lib/Target/X86/AsmParser/LLVMBuild.txt
+++ b/lib/Target/X86/AsmParser/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = X86AsmParser
parent = X86
required_libraries = MC MCParser Support X86Desc X86Info
add_to_library_groups = X86
-
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 4542d4b..be15899 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -51,21 +51,6 @@ endif()
add_llvm_target(X86CodeGen ${sources})
-add_llvm_library_dependencies(LLVMX86CodeGen
- LLVMAnalysis
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- LLVMX86AsmPrinter
- LLVMX86Desc
- LLVMX86Info
- LLVMX86Utils
- )
-
add_subdirectory(AsmParser)
add_subdirectory(Disassembler)
add_subdirectory(InstPrinter)
diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt
index 4f570d5..0cd6db9 100644
--- a/lib/Target/X86/Disassembler/CMakeLists.txt
+++ b/lib/Target/X86/Disassembler/CMakeLists.txt
@@ -5,12 +5,6 @@ add_llvm_library(LLVMX86Disassembler
X86DisassemblerDecoder.c
)
-add_llvm_library_dependencies(LLVMX86Disassembler
- LLVMMC
- LLVMSupport
- LLVMX86Info
- )
-
# workaround for hanging compilation on MSVC9 and 10
if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
set_property(
diff --git a/lib/Target/X86/Disassembler/LLVMBuild.txt b/lib/Target/X86/Disassembler/LLVMBuild.txt
index cd748cf..cac7adf 100644
--- a/lib/Target/X86/Disassembler/LLVMBuild.txt
+++ b/lib/Target/X86/Disassembler/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = X86Disassembler
parent = X86
required_libraries = MC Support X86Info
add_to_library_groups = X86
-
diff --git a/lib/Target/X86/InstPrinter/CMakeLists.txt b/lib/Target/X86/InstPrinter/CMakeLists.txt
index 2a2b5db..28e2460 100644
--- a/lib/Target/X86/InstPrinter/CMakeLists.txt
+++ b/lib/Target/X86/InstPrinter/CMakeLists.txt
@@ -6,10 +6,4 @@ add_llvm_library(LLVMX86AsmPrinter
X86InstComments.cpp
)
-add_llvm_library_dependencies(LLVMX86AsmPrinter
- LLVMMC
- LLVMSupport
- LLVMX86Utils
- )
-
add_dependencies(LLVMX86AsmPrinter X86CommonTableGen)
diff --git a/lib/Target/X86/InstPrinter/LLVMBuild.txt b/lib/Target/X86/InstPrinter/LLVMBuild.txt
index fb01323..6868dde 100644
--- a/lib/Target/X86/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/X86/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = X86AsmPrinter
parent = X86
required_libraries = MC Support X86Utils
add_to_library_groups = X86
-
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 6e87efa..6e4b1b9 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -106,28 +106,92 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
// FALL THROUGH.
case X86::PUNPCKHBWrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKHMask(16, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);
+ break;
+ case X86::VPUNPCKHBWrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHBWrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);
+ break;
+ case X86::VPUNPCKHBWYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHBWYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v32i8, ShuffleMask);
break;
case X86::PUNPCKHWDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKHWDrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKHMask(8, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);
+ break;
+ case X86::VPUNPCKHWDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHWDrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);
+ break;
+ case X86::VPUNPCKHWDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHWDYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v16i16, ShuffleMask);
break;
case X86::PUNPCKHDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKHDQrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKHMask(4, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);
+ break;
+ case X86::VPUNPCKHDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHDQrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);
+ break;
+ case X86::VPUNPCKHDQYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHDQYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v8i32, ShuffleMask);
break;
case X86::PUNPCKHQDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKHQDQrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKHMask(2, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);
+ break;
+ case X86::VPUNPCKHQDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHQDQrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);
+ break;
+ case X86::VPUNPCKHQDQYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKHQDQYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKHMask(MVT::v4i64, ShuffleMask);
break;
case X86::PUNPCKLBWrr:
@@ -135,42 +199,117 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
// FALL THROUGH.
case X86::PUNPCKLBWrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLBWMask(16, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);
+ break;
+ case X86::VPUNPCKLBWrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLBWrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);
+ break;
+ case X86::VPUNPCKLBWYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLBWYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v32i8, ShuffleMask);
break;
case X86::PUNPCKLWDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLWDrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLWDMask(8, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);
+ break;
+ case X86::VPUNPCKLWDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLWDrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);
+ break;
+ case X86::VPUNPCKLWDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLWDYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v16i16, ShuffleMask);
break;
case X86::PUNPCKLDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLDQrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLDQMask(4, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);
+ break;
+ case X86::VPUNPCKLDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLDQrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);
+ break;
+ case X86::VPUNPCKLDQYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLDQYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v8i32, ShuffleMask);
break;
case X86::PUNPCKLQDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLQDQrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLQDQMask(2, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);
+ break;
+ case X86::VPUNPCKLQDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLQDQrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);
+ break;
+ case X86::VPUNPCKLQDQYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPUNPCKLQDQYrm:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(MVT::v4i64, ShuffleMask);
break;
case X86::SHUFPDrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::SHUFPDrmi:
- DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
+ DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VSHUFPDrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VSHUFPDrmi:
- DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
+ DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VSHUFPDYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VSHUFPDYrmi:
+ DecodeSHUFPMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -179,14 +318,25 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::SHUFPSrmi:
- DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
+ DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VSHUFPSrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VSHUFPSrmi:
- DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
+ DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::VSHUFPSYrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VSHUFPSYrmi:
+ DecodeSHUFPMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -195,14 +345,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKLPDrm:
- DecodeUNPCKLPMask(MVT::v2f64, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VUNPCKLPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKLPDrm:
- DecodeUNPCKLPMask(MVT::v2f64, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -210,7 +360,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKLPDYrm:
- DecodeUNPCKLPMask(MVT::v4f64, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v4f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -218,14 +368,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKLPSrm:
- DecodeUNPCKLPMask(MVT::v4f32, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VUNPCKLPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKLPSrm:
- DecodeUNPCKLPMask(MVT::v4f32, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -233,7 +383,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKLPSYrm:
- DecodeUNPCKLPMask(MVT::v8f32, ShuffleMask);
+ DecodeUNPCKLMask(MVT::v8f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -241,14 +391,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKHPDrm:
- DecodeUNPCKHPMask(MVT::v2f64, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VUNPCKHPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKHPDrm:
- DecodeUNPCKHPMask(MVT::v2f64, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -256,7 +406,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKHPDYrm:
- DecodeUNPCKLPMask(MVT::v4f64, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v4f64, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -264,14 +414,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKHPSrm:
- DecodeUNPCKHPMask(MVT::v4f32, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
case X86::VUNPCKHPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKHPSrm:
- DecodeUNPCKHPMask(MVT::v4f32, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -279,34 +429,52 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::VUNPCKHPSYrm:
- DecodeUNPCKHPMask(MVT::v8f32, ShuffleMask);
+ DecodeUNPCKHMask(MVT::v8f32, ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VPERMILPSri:
- DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMILPSmi:
+ DecodeVPERMILPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VPERMILPSYri:
- DecodeVPERMILPSMask(8, MI->getOperand(2).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMILPSYmi:
+ DecodeVPERMILPMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VPERMILPDri:
- DecodeVPERMILPDMask(2, MI->getOperand(2).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMILPDmi:
+ DecodeVPERMILPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VPERMILPDYri:
- DecodeVPERMILPDMask(4, MI->getOperand(2).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(0).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::VPERMILPDYmi:
+ DecodeVPERMILPMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
case X86::VPERM2F128rr:
- DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
+ case X86::VPERM2I128rr:
Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VPERM2F128rm:
+ case X86::VPERM2I128rm:
+ DecodeVPERM2F128Mask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
break;
}
diff --git a/lib/Target/X86/LLVMBuild.txt b/lib/Target/X86/LLVMBuild.txt
index 514566c..87305e0 100644
--- a/lib/Target/X86/LLVMBuild.txt
+++ b/lib/Target/X86/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo Utils
+
[component_0]
type = TargetGroup
name = X86
@@ -30,4 +33,3 @@ name = X86CodeGen
parent = X86
required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target X86AsmPrinter X86Desc X86Info X86Utils
add_to_library_groups = X86
-
diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
index 264e791..ab2ebb4 100644
--- a/lib/Target/X86/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
@@ -6,13 +6,6 @@ add_llvm_library(LLVMX86Desc
X86MachObjectWriter.cpp
)
-add_llvm_library_dependencies(LLVMX86Desc
- LLVMMC
- LLVMSupport
- LLVMX86AsmPrinter
- LLVMX86Info
- )
-
add_dependencies(LLVMX86Desc X86CommonTableGen)
# Hack: we need to include 'main' target directory to grab private headers
diff --git a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
index 3d09301..9e1d29c 100644
--- a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = X86Desc
parent = X86
required_libraries = MC Support X86AsmPrinter X86Info
add_to_library_groups = X86
-
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 69ad7d7..87b2b05 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -107,6 +107,11 @@ public:
bool MayNeedRelaxation(const MCInst &Inst) const;
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const;
+
void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
@@ -244,6 +249,14 @@ bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
return hasExp && !hasRIP;
}
+bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCInstFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // Relax if the value is too big for a (signed) i8.
+ return int64_t(Value) != int64_t(int8_t(Value));
+}
+
// FIXME: Can tblgen help at all here to verify there aren't other instructions
// we can relax?
void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index c50f785..662ac1d 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -304,6 +304,12 @@ namespace X86II {
// TAXD - Prefix before and after 0x0F. Combination of TA and XD.
TAXD = 19 << Op0Shift,
+ // XOP8 - Prefix to include use of imm byte.
+ XOP8 = 20 << Op0Shift,
+
+ // XOP9 - Prefix to exclude use of imm byte.
+ XOP9 = 21 << Op0Shift,
+
//===------------------------------------------------------------------===//
// REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
// They are used to specify GPRs and SSE registers, 64-bit operand size,
@@ -418,7 +424,16 @@ namespace X86II {
/// storing a classifier in the imm8 field. To simplify our implementation,
/// we handle this by storeing the classifier in the opcode field and using
/// this flag to indicate that the encoder should do the wacky 3DNow! thing.
- Has3DNow0F0FOpcode = 1U << 7
+ Has3DNow0F0FOpcode = 1U << 7,
+
+ /// XOP_W - Same bit as VEX_W. Used to indicate swapping of
+ /// operand 3 and 4 to be encoded in ModRM or I8IMM. This is used
+ /// for FMA4 and XOP instructions.
+ XOP_W = 1U << 8,
+
+ /// XOP - Opcode prefix used by XOP instructions.
+ XOP = 1U << 9
+
};
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
@@ -488,9 +503,12 @@ namespace X86II {
return 0;
case X86II::MRMSrcMem: {
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ bool HasXOP_W = (TSFlags >> X86II::VEXShift) & X86II::XOP_W;
unsigned FirstMemOp = 1;
if (HasVEX_4V)
++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
+ if (HasXOP_W)
+ ++FirstMemOp;// Skip the register source (which is encoded in I8IMM).
// FIXME: Maybe lea should have its own form? This is a horrible hack.
//if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 2703100..eb64ad1 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -125,7 +125,19 @@ getNonexecutableStackSection(MCContext &Ctx) const {
0, SectionKind::getMetadata());
}
-X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
+X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
+ if (Triple.getArch() == Triple::x86_64) {
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+ }
+
+ AsmTransCBE = x86_asm_table;
+ AssemblerDialect = AsmWriterFlavor;
+
+ TextAlignFillValue = 0x90;
+}
+
+X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
if (Triple.getArch() == Triple::x86_64) {
GlobalPrefix = "";
PrivateGlobalPrefix = ".L";
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
index 2cd4c8e..5d619e8 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
@@ -38,8 +38,12 @@ namespace llvm {
virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
};
- struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF {
- explicit X86MCAsmInfoCOFF(const Triple &Triple);
+ struct X86MCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
+ explicit X86MCAsmInfoMicrosoft(const Triple &Triple);
+ };
+
+ struct X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF {
+ explicit X86MCAsmInfoGNUCOFF(const Triple &Triple);
};
} // namespace llvm
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 1ab469c..8e14cb1 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -169,23 +169,36 @@ static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
return false;
}
-/// StartsWithGlobalOffsetTable - Return true for the simple cases where this
-/// expression starts with _GLOBAL_OFFSET_TABLE_. This is a needed to support
-/// PIC on ELF i386 as that symbol is magic. We check only simple case that
+/// StartsWithGlobalOffsetTable - Check if this expression starts with
+/// _GLOBAL_OFFSET_TABLE_ and if it is of the form
+/// _GLOBAL_OFFSET_TABLE_-symbol. This is needed to support PIC on ELF
+/// i386 as _GLOBAL_OFFSET_TABLE_ is magical. We check only simple case that
/// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start
/// of a binary expression.
-static bool StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+enum GlobalOffsetTableExprKind {
+ GOT_None,
+ GOT_Normal,
+ GOT_SymDiff
+};
+static GlobalOffsetTableExprKind
+StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+ const MCExpr *RHS = 0;
if (Expr->getKind() == MCExpr::Binary) {
const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr);
Expr = BE->getLHS();
+ RHS = BE->getRHS();
}
if (Expr->getKind() != MCExpr::SymbolRef)
- return false;
+ return GOT_None;
const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
const MCSymbol &S = Ref->getSymbol();
- return S.getName() == "_GLOBAL_OFFSET_TABLE_";
+ if (S.getName() != "_GLOBAL_OFFSET_TABLE_")
+ return GOT_None;
+ if (RHS && RHS->getKind() == MCExpr::SymbolRef)
+ return GOT_SymDiff;
+ return GOT_Normal;
}
void X86MCCodeEmitter::
@@ -209,12 +222,15 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
// If we have an immoffset, add it to the expression.
if ((FixupKind == FK_Data_4 ||
- FixupKind == MCFixupKind(X86::reloc_signed_4byte)) &&
- StartsWithGlobalOffsetTable(Expr)) {
- assert(ImmOffset == 0);
-
- FixupKind = MCFixupKind(X86::reloc_global_offset_table);
- ImmOffset = CurByte;
+ FixupKind == MCFixupKind(X86::reloc_signed_4byte))) {
+ GlobalOffsetTableExprKind Kind = StartsWithGlobalOffsetTable(Expr);
+ if (Kind != GOT_None) {
+ assert(ImmOffset == 0);
+
+ FixupKind = MCFixupKind(X86::reloc_global_offset_table);
+ if (Kind == GOT_Normal)
+ ImmOffset = CurByte;
+ }
}
// If the fixup is pc-relative, we need to bias the value to be relative to
@@ -415,6 +431,13 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// opcode extension, or ignored, depending on the opcode byte)
unsigned char VEX_W = 0;
+ // XOP_W: opcode specific, same bit as VEX_W, but used to
+ // swap operand 3 and 4 for FMA4 and XOP instructions
+ unsigned char XOP_W = 0;
+
+ // XOP: Use XOP prefix byte 0x8f instead of VEX.
+ unsigned char XOP = 0;
+
// VEX_5M (VEX m-mmmmm field):
//
// 0b00000: Reserved for future use
@@ -422,7 +445,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// 0b00010: implied 0F 38 leading opcode bytes
// 0b00011: implied 0F 3A leading opcode bytes
// 0b00100-0b11111: Reserved for future use
- //
+ // 0b01000: XOP map select - 08h instructions with imm byte
+ // 0b10001: XOP map select - 09h instructions with no imm byte
unsigned char VEX_5M = 0x1;
// VEX_4V (VEX vvvv field): a register specifier
@@ -453,6 +477,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
VEX_W = 1;
+ if ((TSFlags >> X86II::VEXShift) & X86II::XOP_W)
+ XOP_W = 1;
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::XOP)
+ XOP = 1;
+
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
VEX_L = 1;
@@ -482,6 +512,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
case X86II::XD: // F2 0F
VEX_PP = 0x3;
break;
+ case X86II::XOP8:
+ VEX_5M = 0x8;
+ break;
+ case X86II::XOP9:
+ VEX_5M = 0x9;
+ break;
case X86II::A6: // Bypass: Not used by VEX
case X86II::A7: // Bypass: Not used by VEX
case X86II::TB: // Bypass: Not used by VEX
@@ -489,6 +525,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
break; // No prefix!
}
+
// Set the vector length to 256-bit if YMM0-YMM15 is used
for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
if (!MI.getOperand(i).isReg())
@@ -529,6 +566,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// src1(ModR/M), MemAddr, imm8
// src1(ModR/M), MemAddr, src2(VEX_I8IMM)
//
+ // FMA4:
+ // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
VEX_R = 0x0;
@@ -620,16 +660,16 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
//
unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
- if (VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { // 2 byte VEX prefix
+ if (VEX_B && VEX_X && !VEX_W && !XOP && (VEX_5M == 1)) { // 2 byte VEX prefix
EmitByte(0xC5, CurByte, OS);
EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
return;
}
// 3 byte VEX prefix
- EmitByte(0xC4, CurByte, OS);
+ EmitByte(XOP ? 0x8F : 0xC4, CurByte, OS);
EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
- EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
+ EmitByte(LastByte | ((VEX_W | XOP_W) << 7), CurByte, OS);
}
/// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64
@@ -889,6 +929,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// It uses the VEX.VVVV field?
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+ bool HasXOP_W = (TSFlags >> X86II::VEXShift) & X86II::XOP_W;
+ unsigned XOP_W_I8IMMOperand = 2;
// Determine where the memory operand starts, if present.
int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
@@ -961,9 +1003,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
SrcRegNum++;
+ if(HasXOP_W) // Skip 2nd src (which is encoded in I8IMM)
+ SrcRegNum++;
+
EmitRegModRMByte(MI.getOperand(SrcRegNum),
GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
- CurOp = SrcRegNum + 1;
+
+ // 2 operands skipped with HasXOP_W, comensate accordingly
+ CurOp = HasXOP_W ? SrcRegNum : SrcRegNum + 1;
if (HasVEX_4VOp3)
++CurOp;
break;
@@ -975,6 +1022,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
++AddrOperands;
++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
}
+ if(HasXOP_W) // Skip second register source (encoded in I8IMM)
+ ++FirstMemOp;
EmitByte(BaseOpcode, CurByte, OS);
@@ -1062,12 +1111,24 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// according to the right size for the instruction.
if (CurOp != NumOps) {
// The last source register of a 4 operand instruction in AVX is encoded
- // in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
+ // in bits[7:4] of a immediate byte.
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
- const MCOperand &MO = MI.getOperand(CurOp++);
+ const MCOperand &MO = MI.getOperand(HasXOP_W ? XOP_W_I8IMMOperand
+ : CurOp);
+ CurOp++;
bool IsExtReg = X86II::isX86_64ExtendedReg(MO.getReg());
unsigned RegNum = (IsExtReg ? (1 << 7) : 0);
RegNum |= GetX86RegNum(MO) << 4;
+ // If there is an additional 5th operand it must be an immediate, which
+ // is encoded in bits[3:0]
+ if(CurOp != NumOps) {
+ const MCOperand &MIMM = MI.getOperand(CurOp++);
+ if(MIMM.isImm()) {
+ unsigned Val = MIMM.getImm();
+ assert(Val < 16 && "Immediate operand value out of range");
+ RegNum |= Val;
+ }
+ }
EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS,
Fixups);
} else {
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index a843515..f2a34ed 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -361,8 +361,10 @@ static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) {
MAI = new X86_64MCAsmInfoDarwin(TheTriple);
else
MAI = new X86MCAsmInfoDarwin(TheTriple);
- } else if (TheTriple.isOSWindows()) {
- MAI = new X86MCAsmInfoCOFF(TheTriple);
+ } else if (TheTriple.getOS() == Triple::Win32) {
+ MAI = new X86MCAsmInfoMicrosoft(TheTriple);
+ } else if (TheTriple.getOS() == Triple::MinGW32 || TheTriple.getOS() == Triple::Cygwin) {
+ MAI = new X86MCAsmInfoGNUCOFF(TheTriple);
} else {
MAI = new X86ELFMCAsmInfo(TheTriple);
}
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 7d901af..a581993 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -922,16 +922,3 @@ _test2: ## @test2
The insertps's of $0 are pointless complex copies.
//===---------------------------------------------------------------------===//
-
-If SSE4.1 is available we should inline rounding functions instead of emitting
-a libcall.
-
-floor: roundsd $0x01, %xmm, %xmm
-ceil: roundsd $0x02, %xmm, %xmm
-
-and likewise for the single precision versions.
-
-Currently, SelectionDAGBuilder doesn't turn calls to these functions into the
-corresponding nodes and some targets (including X86) aren't ready for them.
-
-//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/TargetInfo/CMakeLists.txt b/lib/Target/X86/TargetInfo/CMakeLists.txt
index 4da00fa..b1d0b9f 100644
--- a/lib/Target/X86/TargetInfo/CMakeLists.txt
+++ b/lib/Target/X86/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMX86Info
X86TargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMX86Info
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMX86Info X86CommonTableGen)
diff --git a/lib/Target/X86/TargetInfo/LLVMBuild.txt b/lib/Target/X86/TargetInfo/LLVMBuild.txt
index ee015bd..3c64a22 100644
--- a/lib/Target/X86/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/X86/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = X86Info
parent = X86
required_libraries = MC Support Target
add_to_library_groups = X86
-
diff --git a/lib/Target/X86/Utils/CMakeLists.txt b/lib/Target/X86/Utils/CMakeLists.txt
index caffd8b..2e72c34 100644
--- a/lib/Target/X86/Utils/CMakeLists.txt
+++ b/lib/Target/X86/Utils/CMakeLists.txt
@@ -4,9 +4,4 @@ add_llvm_library(LLVMX86Utils
X86ShuffleDecode.cpp
)
-add_llvm_library_dependencies(LLVMX86Utils
- LLVMCore
- LLVMSupport
- )
-
add_dependencies(LLVMX86Utils X86CommonTableGen)
diff --git a/lib/Target/X86/Utils/LLVMBuild.txt b/lib/Target/X86/Utils/LLVMBuild.txt
index 3ee441e..de0a30f 100644
--- a/lib/Target/X86/Utils/LLVMBuild.txt
+++ b/lib/Target/X86/Utils/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = X86Utils
parent = X86
required_libraries = Core Support
add_to_library_groups = X86
-
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index f6c9d7b..e7631b6 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -95,54 +95,31 @@ void DecodePSHUFLWMask(unsigned Imm,
ShuffleMask.push_back(7);
}
-void DecodePUNPCKLBWMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask);
-}
-
-void DecodePUNPCKLWDMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask);
-}
-
-void DecodePUNPCKLDQMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
-}
-
-void DecodePUNPCKLQDQMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
-}
-
-void DecodePUNPCKLMask(EVT VT,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- DecodeUNPCKLPMask(VT, ShuffleMask);
-}
+void DecodeSHUFPMask(EVT VT, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
-void DecodePUNPCKHMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(i+NElts/2);
- ShuffleMask.push_back(i+NElts+NElts/2);
- }
-}
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
-void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- // Part that reads from dest.
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(Imm % NElts);
- Imm /= NElts;
- }
- // Part that reads from src.
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(Imm % NElts + NElts);
- Imm /= NElts;
+ int NewImm = Imm;
+ for (unsigned l = 0; l < NumLanes; ++l) {
+ unsigned LaneStart = l * NumLaneElts;
+ // Part that reads from dest.
+ for (unsigned i = 0; i != NumLaneElts/2; ++i) {
+ ShuffleMask.push_back(NewImm % NumLaneElts + LaneStart);
+ NewImm /= NumLaneElts;
+ }
+ // Part that reads from src.
+ for (unsigned i = 0; i != NumLaneElts/2; ++i) {
+ ShuffleMask.push_back(NewImm % NumLaneElts + NumElts + LaneStart);
+ NewImm /= NumLaneElts;
+ }
+ if (NumLaneElts == 4) NewImm = Imm; // reload imm
}
}
-void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -161,10 +138,10 @@ void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
}
}
-/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
/// etc. VT indicates the type of the vector allowing it to handle different
/// datatypes and vector widths.
-void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -183,36 +160,23 @@ void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
}
}
-// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit
-// elements. For 256-bit vectors, it's considered as two 128 lanes, the
-// referenced elements can't cross lanes and the mask of the first lane must
-// be the same of the second.
-void DecodeVPERMILPSMask(unsigned NumElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- unsigned NumLanes = (NumElts*32)/128;
- unsigned LaneSize = NumElts/NumLanes;
-
- for (unsigned l = 0; l != NumLanes; ++l) {
- for (unsigned i = 0; i != LaneSize; ++i) {
- unsigned Idx = (Imm >> (i*2)) & 0x3 ;
- ShuffleMask.push_back(Idx+(l*LaneSize));
- }
- }
-}
+// DecodeVPERMILPMask - Decodes VPERMILPS/ VPERMILPD permutes for any 128-bit
+// 32-bit or 64-bit elements. For 256-bit vectors, it's considered as two 128
+// lanes. For VPERMILPS, referenced elements can't cross lanes and the mask of
+// the first lane must be the same of the second.
+void DecodeVPERMILPMask(EVT VT, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
-// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit
-// elements. For 256-bit vectors, it's considered as two 128 lanes, the
-// referenced elements can't cross lanes but the mask of the first lane can
-// be the different of the second (not like VPERMILPS).
-void DecodeVPERMILPDMask(unsigned NumElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
- unsigned NumLanes = (NumElts*64)/128;
- unsigned LaneSize = NumElts/NumLanes;
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
- for (unsigned l = 0; l < NumLanes; ++l) {
- for (unsigned i = l*LaneSize; i < LaneSize*(l+1); ++i) {
- unsigned Idx = (Imm >> i) & 0x1;
- ShuffleMask.push_back(Idx+(l*LaneSize));
+ for (unsigned l = 0; l != NumLanes; ++l) {
+ unsigned LaneStart = l*NumLaneElts;
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ unsigned Idx = NumLaneElts == 4 ? (Imm >> (i*2)) & 0x3
+ : (Imm >> (i+LaneStart)) & 0x1;
+ ShuffleMask.push_back(Idx+LaneStart);
}
}
}
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 35f6530..243728f 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -46,50 +46,25 @@ void DecodePSHUFHWMask(unsigned Imm,
void DecodePSHUFLWMask(unsigned Imm,
SmallVectorImpl<unsigned> &ShuffleMask);
-void DecodePUNPCKLBWMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodePUNPCKLWDMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodePUNPCKLDQMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodePUNPCKLQDQMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodePUNPCKLMask(EVT VT,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodePUNPCKHMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeSHUFPMask(EVT VT, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask);
-/// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd
+/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
/// etc. VT indicates the type of the vector allowing it to handle different
/// datatypes and vector widths.
-void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
-/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
/// etc. VT indicates the type of the vector allowing it to handle different
/// datatypes and vector widths.
-void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
-
+void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
-// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit
-// elements. For 256-bit vectors, it's considered as two 128 lanes, the
-// referenced elements can't cross lanes and the mask of the first lane must
-// be the same of the second.
-void DecodeVPERMILPSMask(unsigned NElts, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
-// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit
-// elements. For 256-bit vectors, it's considered as two 128 lanes, the
-// referenced elements can't cross lanes but the mask of the first lane can
-// be the different of the second (not like VPERMILPS).
-void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm,
+// DecodeVPERMILPMask - Decodes VPERMILPS/ VPERMILPD permutes for any 128-bit
+// 32-bit or 64-bit elements. For 256-bit vectors, it's considered as two 128
+// lanes. For VPERMILPS, referenced elements can't cross lanes and the mask of
+// the first lane must be the same of the second.
+void DecodeVPERMILPMask(EVT VT, unsigned Imm,
SmallVectorImpl<unsigned> &ShuffleMask);
void DecodeVPERM2F128Mask(unsigned Imm,
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 62a7016..8229ca5 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -91,6 +91,8 @@ def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true",
"Enable three-operand fused multiple-add">;
def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
"Enable four-operand fused multiple-add">;
+def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
+ "Enable XOP instructions">;
def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
"HasVectorUAMem", "true",
"Allow unaligned memory operands on vector/SIMD instructions">;
@@ -194,14 +196,16 @@ def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A,
- Feature3DNowA, FeatureCMPXCHG16B,
+ Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
FeatureSlowBTMem]>;
-def : Proc<"barcelona", [FeatureSSE3, FeatureSSE4A,
- Feature3DNowA, FeatureCMPXCHG16B,
- FeatureSlowBTMem]>;
-def : Proc<"istanbul", [Feature3DNowA, FeatureCMPXCHG16B,
- FeatureSSE4A]>;
-def : Proc<"shanghai", [Feature3DNowA, FeatureCMPXCHG16B, FeatureSSE4A]>;
+// FIXME: Disabling AVX for now since it's not ready.
+def : Proc<"bdver1", [FeatureSSE42, FeatureSSE4A, FeatureCMPXCHG16B,
+ FeatureAES, FeatureCLMUL, FeatureFMA4,
+ FeatureXOP, FeatureLZCNT]>;
+def : Proc<"bdver2", [FeatureSSE42, FeatureSSE4A, FeatureCMPXCHG16B,
+ FeatureAES, FeatureCLMUL, FeatureFMA4,
+ FeatureXOP, FeatureF16C, FeatureLZCNT,
+ FeatureBMI]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
def : Proc<"winchip2", [Feature3DNow]>;
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 77b9905..aab2a05 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -158,10 +158,15 @@ def CC_X86_64_C : CallingConv<[
CCIfSubtarget<"hasXMM()",
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
- // The first 8 256-bit vector arguments are passed in YMM registers.
- CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
- CCIfSubtarget<"hasAVX()",
- CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7]>>>,
+ // The first 8 256-bit vector arguments are passed in YMM registers, unless
+ // this is a vararg function.
+ // FIXME: This isn't precisely correct; the x86-64 ABI document says that
+ // fixed arguments to vararg functions are supposed to be passed in
+ // registers. Actually modeling that would be a lot of work, though.
+ CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()",
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3,
+ YMM4, YMM5, YMM6, YMM7]>>>>,
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index ba615a8..ed16e88 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -1004,7 +1004,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
break;
}
- if (!Desc->isVariadic() && CurOp != NumOps) {
+ if (!MI.isVariadic() && CurOp != NumOps) {
#ifndef NDEBUG
dbgs() << "Cannot encode all operands of: " << MI << "\n";
#endif
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 32f1770..1589439 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -728,7 +728,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
// fastcc with -tailcallopt is intended to provide a guaranteed
// tail call optimization. Fastisel doesn't know how to do that.
- if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+ if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
return false;
// Let SDISel handle vararg functions.
@@ -1529,7 +1529,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
// fastcc with -tailcallopt is intended to provide a guaranteed
// tail call optimization. Fastisel doesn't know how to do that.
- if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+ if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
return false;
PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
@@ -1543,7 +1543,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
// Fast-isel doesn't know about callee-pop yet.
if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg,
- GuaranteedTailCallOpt))
+ TM.Options.GuaranteedTailCallOpt))
return false;
// Check whether the function can return without sret-demotion.
@@ -2121,7 +2121,7 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
default: return false;
case MVT::f32:
if (X86ScalarSSEf32) {
- Opc = Subtarget->hasAVX() ? X86::VFsFLD0SS : X86::FsFLD0SS;
+ Opc = X86::FsFLD0SS;
RC = X86::FR32RegisterClass;
} else {
Opc = X86::LD_Fp032;
@@ -2130,7 +2130,7 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
break;
case MVT::f64:
if (X86ScalarSSEf64) {
- Opc = Subtarget->hasAVX() ? X86::VFsFLD0SD : X86::FsFLD0SD;
+ Opc = X86::FsFLD0SD;
RC = X86::FR64RegisterClass;
} else {
Opc = X86::LD_Fp064;
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 819d242..6a40cc1 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -47,7 +47,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineModuleInfo &MMI = MF.getMMI();
const TargetRegisterInfo *RI = TM.getRegisterInfo();
- return (DisableFramePointerElim(MF) ||
+ return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
RI->needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken() ||
@@ -210,7 +210,7 @@ static
void mergeSPUpdatesDown(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
unsigned StackPtr, uint64_t *NumBytes = NULL) {
- // FIXME: THIS ISN'T RUN!!!
+ // FIXME: THIS ISN'T RUN!!!
return;
if (MBBI == MBB.end()) return;
@@ -351,20 +351,22 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
/// register. The number corresponds to the enum lists in
/// compact_unwind_encoding.h.
static int getCompactUnwindRegNum(const unsigned *CURegs, unsigned Reg) {
- int Idx = 1;
- for (; *CURegs; ++CURegs, ++Idx)
+ for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
if (*CURegs == Reg)
return Idx;
return -1;
}
+// Number of registers that can be saved in a compact unwind encoding.
+#define CU_NUM_SAVED_REGS 6
+
/// encodeCompactUnwindRegistersWithoutFrame - Create the permutation encoding
/// used with frameless stacks. It is passed the number of registers to be saved
/// and an array of the registers saved.
-static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6],
- unsigned RegCount,
- bool Is64Bit) {
+static uint32_t
+encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
+ unsigned RegCount, bool Is64Bit) {
// The saved registers are numbered from 1 to 6. In order to encode the order
// in which they were saved, we re-number them according to their place in the
// register order. The re-numbering is relative to the last re-numbered
@@ -385,14 +387,21 @@ static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6],
};
const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
- uint32_t RenumRegs[6];
- for (unsigned i = 6 - RegCount; i < 6; ++i) {
+ for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) {
int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]);
if (CUReg == -1) return ~0U;
SavedRegs[i] = CUReg;
+ }
+
+ // Reverse the list.
+ std::swap(SavedRegs[0], SavedRegs[5]);
+ std::swap(SavedRegs[1], SavedRegs[4]);
+ std::swap(SavedRegs[2], SavedRegs[3]);
+ uint32_t RenumRegs[CU_NUM_SAVED_REGS];
+ for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i) {
unsigned Countless = 0;
- for (unsigned j = 6 - RegCount; j < i; ++j)
+ for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
if (SavedRegs[j] < SavedRegs[i])
++Countless;
@@ -435,8 +444,9 @@ static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6],
/// encodeCompactUnwindRegistersWithFrame - Return the registers encoded for a
/// compact encoding with a frame pointer.
-static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6],
- bool Is64Bit) {
+static uint32_t
+encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
+ bool Is64Bit) {
static const unsigned CU32BitRegs[] = {
X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
};
@@ -448,13 +458,16 @@ static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6],
// Encode the registers in the order they were saved, 3-bits per register. The
// registers are numbered from 1 to 6.
uint32_t RegEnc = 0;
- for (int I = 5; I >= 0; --I) {
+ for (int I = 0; I != 6; --I) {
unsigned Reg = SavedRegs[I];
if (Reg == 0) break;
int CURegNum = getCompactUnwindRegNum(CURegs, Reg);
if (CURegNum == -1)
return ~0U;
- RegEnc |= (CURegNum & 0x7) << (5 - I);
+
+ // Encode the 3-bit register number in order, skipping over 3-bits for each
+ // register.
+ RegEnc |= (CURegNum & 0x7) << ((5 - I) * 3);
}
assert((RegEnc & 0x7FFF) == RegEnc && "Invalid compact register encoding!");
@@ -466,14 +479,11 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
unsigned FramePtr = RegInfo->getFrameRegister(MF);
unsigned StackPtr = RegInfo->getStackRegister();
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
-
bool Is64Bit = STI.is64Bit();
bool HasFP = hasFP(MF);
- unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 };
- int SavedRegIdx = 6;
+ unsigned SavedRegs[CU_NUM_SAVED_REGS] = { 0, 0, 0, 0, 0, 0 };
+ unsigned SavedRegIdx = 0;
unsigned OffsetSize = (Is64Bit ? 8 : 4);
@@ -481,14 +491,13 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
unsigned PushInstrSize = 1;
unsigned MoveInstr = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
unsigned MoveInstrSize = (Is64Bit ? 3 : 2);
- unsigned SubtractInstr = getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta);
unsigned SubtractInstrIdx = (Is64Bit ? 3 : 2);
unsigned StackDivide = (Is64Bit ? 8 : 4);
unsigned InstrOffset = 0;
- unsigned CFAOffset = 0;
unsigned StackAdjust = 0;
+ unsigned StackSize = 0;
MachineBasicBlock &MBB = MF.front(); // Prologue is in entry BB.
bool ExpectEnd = false;
@@ -504,10 +513,10 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
if (Opc == PushInstr) {
// If there are too many saved registers, we cannot use compact encoding.
- if (--SavedRegIdx < 0) return 0;
+ if (SavedRegIdx >= CU_NUM_SAVED_REGS) return 0;
- SavedRegs[SavedRegIdx] = MI.getOperand(0).getReg();
- CFAOffset += OffsetSize;
+ SavedRegs[SavedRegIdx++] = MI.getOperand(0).getReg();
+ StackAdjust += OffsetSize;
InstrOffset += PushInstrSize;
} else if (Opc == MoveInstr) {
unsigned SrcReg = MI.getOperand(1).getReg();
@@ -516,13 +525,14 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
if (DstReg != FramePtr || SrcReg != StackPtr)
return 0;
- CFAOffset = 0;
+ StackAdjust = 0;
memset(SavedRegs, 0, sizeof(SavedRegs));
- SavedRegIdx = 6;
+ SavedRegIdx = 0;
InstrOffset += MoveInstrSize;
- } else if (Opc == SubtractInstr) {
- if (StackAdjust)
- // We all ready have a stack pointer adjustment.
+ } else if (Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) {
+ if (StackSize)
+ // We already have a stack size.
return 0;
if (!MI.getOperand(0).isReg() ||
@@ -533,7 +543,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
// %RSP<def> = SUB64ri8 %RSP, 48
return 0;
- StackAdjust = MI.getOperand(2).getImm() / StackDivide;
+ StackSize = MI.getOperand(2).getImm() / StackDivide;
SubtractInstrIdx += InstrOffset;
ExpectEnd = true;
}
@@ -541,28 +551,30 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
// Encode that we are using EBP/RBP as the frame pointer.
uint32_t CompactUnwindEncoding = 0;
- CFAOffset /= StackDivide;
+ StackAdjust /= StackDivide;
if (HasFP) {
- if ((CFAOffset & 0xFF) != CFAOffset)
+ if ((StackAdjust & 0xFF) != StackAdjust)
// Offset was too big for compact encoding.
return 0;
// Get the encoding of the saved registers when we have a frame pointer.
uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit);
- if (RegEnc == ~0U)
- return 0;
+ if (RegEnc == ~0U) return 0;
CompactUnwindEncoding |= 0x01000000;
- CompactUnwindEncoding |= (CFAOffset & 0xFF) << 16;
+ CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
CompactUnwindEncoding |= RegEnc & 0x7FFF;
} else {
- unsigned FullOffset = CFAOffset + StackAdjust;
- if ((FullOffset & 0xFF) == FullOffset) {
- // Frameless stack.
+ ++StackAdjust;
+ uint32_t TotalStackSize = StackAdjust + StackSize;
+ if ((TotalStackSize & 0xFF) == TotalStackSize) {
+ // Frameless stack with a small stack size.
CompactUnwindEncoding |= 0x02000000;
- CompactUnwindEncoding |= (FullOffset & 0xFF) << 16;
+
+ // Encode the stack size.
+ CompactUnwindEncoding |= (TotalStackSize & 0xFF) << 16;
} else {
- if ((CFAOffset & 0x7) != CFAOffset)
+ if ((StackAdjust & 0x7) != StackAdjust)
// The extra stack adjustments are too big for us to handle.
return 0;
@@ -573,16 +585,21 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
// instruction.
CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
- // Encode any extra stack stack changes (done via push instructions).
- CompactUnwindEncoding |= (CFAOffset & 0x7) << 13;
+ // Encode any extra stack stack adjustments (done via push instructions).
+ CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
}
+ // Encode the number of registers saved.
+ CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
+
// Get the encoding of the saved registers when we don't have a frame
// pointer.
- uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegs,
- 6 - SavedRegIdx,
- Is64Bit);
+ uint32_t RegEnc =
+ encodeCompactUnwindRegistersWithoutFrame(SavedRegs, SavedRegIdx,
+ Is64Bit);
if (RegEnc == ~0U) return 0;
+
+ // Encode the register encoding.
CompactUnwindEncoding |= RegEnc & 0x3FF;
}
@@ -638,10 +655,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// stack pointer (we fit in the Red Zone).
if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
!RegInfo->needsStackRealignment(MF) &&
- !MFI->hasVarSizedObjects() && // No dynamic alloca.
- !MFI->adjustsStack() && // No calls.
- !IsWin64 && // Win64 has no Red Zone
- !EnableSegmentedStacks) { // Regular stack
+ !MFI->hasVarSizedObjects() && // No dynamic alloca.
+ !MFI->adjustsStack() && // No calls.
+ !IsWin64 && // Win64 has no Red Zone
+ !MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
if (HasFP) MinSize += SlotSize;
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
@@ -978,7 +995,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
unsigned Opc = PI->getOpcode();
if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
- !PI->getDesc().isTerminator())
+ !PI->isTerminator())
break;
--MBBI;
@@ -1306,6 +1323,10 @@ GetScratchRegister(bool Is64Bit, const MachineFunction &MF) {
}
}
+// The stack limit in the TCB is set to this many bytes above the actual stack
+// limit.
+static const uint64_t kSplitStackAvailable = 256;
+
void
X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
MachineBasicBlock &prologueMBB = MF.front();
@@ -1360,16 +1381,24 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
TlsReg = X86::FS;
TlsOffset = 0x70;
- BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
- .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
+ if (StackSize < kSplitStackAvailable)
+ ScratchReg = X86::RSP;
+ else
+ BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
+ .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
+
BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg)
.addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
} else {
TlsReg = X86::GS;
TlsOffset = 0x30;
- BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
- .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
+ if (StackSize < kSplitStackAvailable)
+ ScratchReg = X86::ESP;
+ else
+ BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
+ .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
+
BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
.addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
}
@@ -1394,9 +1423,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
MF.getRegInfo().setPhysRegUsed(X86::R10);
MF.getRegInfo().setPhysRegUsed(X86::R11);
} else {
- // Since we'll call __morestack, stack alignment needs to be preserved.
- BuildMI(allocMBB, DL, TII.get(X86::SUB32ri), X86::ESP).addReg(X86::ESP)
- .addImm(8);
BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
.addImm(X86FI->getArgumentStackSize());
BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
@@ -1411,11 +1437,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
.addExternalSymbol("__morestack");
- // __morestack only seems to remove 8 bytes off the stack. Add back the
- // additional 8 bytes we added before pushing the arguments.
- if (!Is64Bit)
- BuildMI(allocMBB, DL, TII.get(X86::ADD32ri), X86::ESP).addReg(X86::ESP)
- .addImm(8);
if (IsNested)
BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
else
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 96c6f41..03727a2 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -256,7 +256,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->is64Bit()) {
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
- } else if (!UseSoftFloat) {
+ } else if (!TM.Options.UseSoftFloat) {
// We have an algorithm for SSE2->double, and we turn this into a
// 64-bit FILD followed by conditional FADD for other targets.
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
@@ -270,7 +270,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
- if (!UseSoftFloat) {
+ if (!TM.Options.UseSoftFloat) {
// SSE has no i16 to fp conversion, only i32
if (X86ScalarSSEf32) {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
@@ -313,7 +313,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->is64Bit()) {
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
- } else if (!UseSoftFloat) {
+ } else if (!TM.Options.UseSoftFloat) {
// Since AVX is a superset of SSE3, only check for SSE here.
if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
// Expand FP_TO_UINT into a select.
@@ -378,6 +378,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FREM , MVT::f80 , Expand);
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i16 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i64 , Expand);
if (Subtarget->hasBMI()) {
setOperationAction(ISD::CTTZ , MVT::i8 , Promote);
} else {
@@ -388,6 +392,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
}
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i8 , Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i16 , Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i64 , Expand);
if (Subtarget->hasLZCNT()) {
setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
} else {
@@ -537,14 +545,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Custom);
- else if (EnableSegmentedStacks)
+ else if (TM.Options.EnableSegmentedStacks)
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Expand);
- if (!UseSoftFloat && X86ScalarSSEf64) {
+ if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
// f32 and f64 use SSE.
// Set up the FP register classes.
addRegisterClass(MVT::f32, X86::FR32RegisterClass);
@@ -576,7 +584,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// cases we handle.
addLegalFPImmediate(APFloat(+0.0)); // xorpd
addLegalFPImmediate(APFloat(+0.0f)); // xorps
- } else if (!UseSoftFloat && X86ScalarSSEf32) {
+ } else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) {
// Use SSE for f32, x87 for f64.
// Set up the FP register classes.
addRegisterClass(MVT::f32, X86::FR32RegisterClass);
@@ -605,11 +613,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
- if (!UnsafeFPMath) {
+ if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64 , Expand);
setOperationAction(ISD::FCOS , MVT::f64 , Expand);
}
- } else if (!UseSoftFloat) {
+ } else if (!TM.Options.UseSoftFloat) {
// f32 and f64 in x87.
// Set up the FP register classes.
addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
@@ -620,7 +628,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
- if (!UnsafeFPMath) {
+ if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64 , Expand);
setOperationAction(ISD::FCOS , MVT::f64 , Expand);
}
@@ -639,7 +647,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FMA, MVT::f32, Expand);
// Long double always uses X87.
- if (!UseSoftFloat) {
+ if (!TM.Options.UseSoftFloat) {
addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
@@ -658,11 +666,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
}
- if (!UnsafeFPMath) {
+ if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f80 , Expand);
setOperationAction(ISD::FCOS , MVT::f80 , Expand);
}
+ setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f80, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
+ setOperationAction(ISD::FRINT, MVT::f80, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
setOperationAction(ISD::FMA, MVT::f80, Expand);
}
@@ -714,7 +727,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand);
@@ -748,7 +763,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
// with -msoft-float, disable use of MMX as well.
- if (!UseSoftFloat && Subtarget->hasMMX()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) {
addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass);
// No operations on x86mmx supported, everything uses intrinsics.
}
@@ -785,7 +800,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
- if (!UseSoftFloat && Subtarget->hasXMM()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasXMM()) {
addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
setOperationAction(ISD::FADD, MVT::v4f32, Legal);
@@ -802,7 +817,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
}
- if (!UseSoftFloat && Subtarget->hasXMMInt()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasXMMInt()) {
addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
// FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
@@ -983,7 +998,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->hasSSE42orAVX())
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
- if (!UseSoftFloat && Subtarget->hasAVX()) {
+ if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) {
addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
addRegisterClass(MVT::v16i16, X86::VR256RegisterClass);
addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
@@ -1211,10 +1226,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
- setPrefLoopAlignment(16);
+ setPrefLoopAlignment(4); // 2^4 bytes.
benefitFromCodePlacementOpt = true;
- setPrefFunctionAlignment(4);
+ setPrefFunctionAlignment(4); // 2^4 bytes.
}
@@ -1709,7 +1724,8 @@ bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
/// FuncIsMadeTailCallSafe - Return true if the function is being made into
/// a tailcall target by changing its ABI.
-static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) {
+static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
+ bool GuaranteedTailCallOpt) {
return GuaranteedTailCallOpt && IsTailCallConvention(CC);
}
@@ -1723,7 +1739,8 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
unsigned i) const {
// Create the nodes corresponding to a load from this parameter slot.
ISD::ArgFlagsTy Flags = Ins[i].Flags;
- bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv);
+ bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv,
+ getTargetMachine().Options.GuaranteedTailCallOpt);
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
EVT ValVT;
@@ -1873,7 +1890,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
unsigned StackSize = CCInfo.getNextStackOffset();
// Align stack specially for tail calls.
- if (FuncIsMadeTailCallSafe(CallConv))
+ if (FuncIsMadeTailCallSafe(CallConv,
+ MF.getTarget().Options.GuaranteedTailCallOpt))
StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
// If the function takes variable number of arguments, make a frame index for
@@ -1918,9 +1936,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
assert(!(NumXMMRegs && !Subtarget->hasXMM()) &&
"SSE register cannot be used when SSE is disabled!");
- assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
+ assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
+ NoImplicitFloatOps) &&
"SSE register cannot be used when SSE is disabled!");
- if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM())
+ if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
+ !Subtarget->hasXMM())
// Kernel mode asks for SSE to be disabled, so don't push them
// on the stack.
TotalNumXMMRegs = 0;
@@ -1998,7 +2018,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
}
// Some CCs need callee pop.
- if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) {
+ if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
+ MF.getTarget().Options.GuaranteedTailCallOpt)) {
FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
} else {
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
@@ -2098,7 +2119,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
- if (!GuaranteedTailCallOpt && isTailCall)
+ if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
IsSibcall = true;
if (isTailCall)
@@ -2126,7 +2147,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
NumBytes = 0;
- else if (GuaranteedTailCallOpt && IsTailCallConvention(CallConv))
+ else if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ IsTailCallConvention(CallConv))
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
@@ -2305,7 +2327,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
int FI = 0;
// Do not flag preceding copytoreg stuff together with the following stuff.
InFlag = SDValue();
- if (GuaranteedTailCallOpt) {
+ if (getTargetMachine().Options.GuaranteedTailCallOpt) {
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (VA.isRegLoc())
@@ -2485,7 +2507,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPush;
- if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt))
+ if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
+ getTargetMachine().Options.GuaranteedTailCallOpt))
NumBytesForCalleeToPush = NumBytes; // Callee pops everything
else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet)
// If this is a call to a struct-return function, the callee
@@ -2643,7 +2666,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
- if (GuaranteedTailCallOpt) {
+ if (getTargetMachine().Options.GuaranteedTailCallOpt) {
if (IsTailCallConvention(CalleeCC) && CCMatch)
return true;
return false;
@@ -2843,23 +2866,10 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::MOVDDUP:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
- case X86ISD::UNPCKLPS:
- case X86ISD::UNPCKLPD:
- case X86ISD::PUNPCKLWD:
- case X86ISD::PUNPCKLBW:
- case X86ISD::PUNPCKLDQ:
- case X86ISD::PUNPCKLQDQ:
- case X86ISD::UNPCKHPS:
- case X86ISD::UNPCKHPD:
- case X86ISD::PUNPCKHWD:
- case X86ISD::PUNPCKHBW:
- case X86ISD::PUNPCKHDQ:
- case X86ISD::PUNPCKHQDQ:
- case X86ISD::VPERMILPS:
- case X86ISD::VPERMILPSY:
- case X86ISD::VPERMILPD:
- case X86ISD::VPERMILPDY:
- case X86ISD::VPERM2F128:
+ case X86ISD::UNPCKL:
+ case X86ISD::UNPCKH:
+ case X86ISD::VPERMILP:
+ case X86ISD::VPERM2X128:
return true;
}
return false;
@@ -2885,10 +2895,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
- case X86ISD::VPERMILPS:
- case X86ISD::VPERMILPSY:
- case X86ISD::VPERMILPD:
- case X86ISD::VPERMILPDY:
+ case X86ISD::VPERMILP:
return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
}
@@ -2902,7 +2909,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::PALIGN:
case X86ISD::SHUFPD:
case X86ISD::SHUFPS:
- case X86ISD::VPERM2F128:
+ case X86ISD::VPERM2X128:
return DAG.getNode(Opc, dl, VT, V1, V2,
DAG.getConstant(TargetMask, MVT::i8));
}
@@ -2920,18 +2927,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::MOVLPD:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
- case X86ISD::UNPCKLPS:
- case X86ISD::UNPCKLPD:
- case X86ISD::PUNPCKLWD:
- case X86ISD::PUNPCKLBW:
- case X86ISD::PUNPCKLDQ:
- case X86ISD::PUNPCKLQDQ:
- case X86ISD::UNPCKHPS:
- case X86ISD::UNPCKHPD:
- case X86ISD::PUNPCKHWD:
- case X86ISD::PUNPCKHBW:
- case X86ISD::PUNPCKHDQ:
- case X86ISD::PUNPCKHQDQ:
+ case X86ISD::UNPCKL:
+ case X86ISD::UNPCKH:
return DAG.getNode(Opc, dl, VT, V1, V2);
}
return SDValue();
@@ -3231,7 +3228,7 @@ bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
bool hasSSSE3OrAVX) {
int i, e = VT.getVectorNumElements();
- if (VT.getSizeInBits() != 128 && VT.getSizeInBits() != 64)
+ if (VT.getSizeInBits() != 128)
return false;
// Do not handle v2i64 / v2f64 shuffles with palignr.
@@ -3261,17 +3258,17 @@ static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
return true;
}
-/// isVSHUFPSYMask - Return true if the specified VECTOR_SHUFFLE operand
+/// isVSHUFPYMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to 256-bit
/// VSHUFPSY.
-static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT,
- const X86Subtarget *Subtarget) {
+static bool isVSHUFPYMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool HasAVX, bool Commuted = false) {
int NumElems = VT.getVectorNumElements();
- if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+ if (!HasAVX || VT.getSizeInBits() != 256)
return false;
- if (NumElems != 8)
+ if (NumElems != 4 && NumElems != 8)
return false;
// VSHUFPSY divides the resulting vector into 4 chunks.
@@ -3284,124 +3281,63 @@ static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT,
// DST => Y7..Y4, Y7..Y4, X7..X4, X7..X4,
// Y3..Y0, Y3..Y0, X3..X0, X3..X0
//
- int QuarterSize = NumElems/4;
- int HalfSize = QuarterSize*2;
- for (int i = 0; i < QuarterSize; ++i)
- if (!isUndefOrInRange(Mask[i], 0, HalfSize))
- return false;
- for (int i = QuarterSize; i < QuarterSize*2; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
- return false;
-
- // The mask of the second half must be the same as the first but with
- // the appropriate offsets. This works in the same way as VPERMILPS
- // works with masks.
- for (int i = QuarterSize*2; i < QuarterSize*3; ++i) {
- if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
- return false;
- int FstHalfIdx = i-HalfSize;
- if (Mask[FstHalfIdx] < 0)
- continue;
- if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
- return false;
- }
- for (int i = QuarterSize*3; i < NumElems; ++i) {
- if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
- return false;
- int FstHalfIdx = i-HalfSize;
- if (Mask[FstHalfIdx] < 0)
- continue;
- if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
- return false;
-
- }
-
- return true;
-}
-
-/// getShuffleVSHUFPSYImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VSHUFPSY instruction.
-static unsigned getShuffleVSHUFPSYImmediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- EVT VT = SVOp->getValueType(0);
- int NumElems = VT.getVectorNumElements();
-
- assert(NumElems == 8 && VT.getSizeInBits() == 256 &&
- "Only supports v8i32 and v8f32 types");
-
- int HalfSize = NumElems/2;
- unsigned Mask = 0;
- for (int i = 0; i != NumElems ; ++i) {
- if (SVOp->getMaskElt(i) < 0)
- continue;
- // The mask of the first half must be equal to the second one.
- unsigned Shamt = (i%HalfSize)*2;
- unsigned Elt = SVOp->getMaskElt(i) % HalfSize;
- Mask |= Elt << Shamt;
- }
-
- return Mask;
-}
-
-/// isVSHUFPDYMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to 256-bit
-/// VSHUFPDY. This shuffle doesn't have the same restriction as the PS
-/// version and the mask of the second half isn't binded with the first
-/// one.
-static bool isVSHUFPDYMask(const SmallVectorImpl<int> &Mask, EVT VT,
- const X86Subtarget *Subtarget) {
- int NumElems = VT.getVectorNumElements();
-
- if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
- return false;
-
- if (NumElems != 4)
- return false;
-
- // VSHUFPSY divides the resulting vector into 4 chunks.
+ // VSHUFPDY divides the resulting vector into 4 chunks.
// The sources are also splitted into 4 chunks, and each destination
// chunk must come from a different source chunk.
//
// SRC1 => X3 X2 X1 X0
// SRC2 => Y3 Y2 Y1 Y0
//
- // DST => Y2..Y3, X2..X3, Y1..Y0, X1..X0
+ // DST => Y3..Y2, X3..X2, Y1..Y0, X1..X0
//
- int QuarterSize = NumElems/4;
- int HalfSize = QuarterSize*2;
- for (int i = 0; i < QuarterSize; ++i)
- if (!isUndefOrInRange(Mask[i], 0, HalfSize))
- return false;
- for (int i = QuarterSize; i < QuarterSize*2; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
- return false;
- for (int i = QuarterSize*2; i < QuarterSize*3; ++i)
- if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
- return false;
- for (int i = QuarterSize*3; i < NumElems; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
- return false;
+ unsigned QuarterSize = NumElems/4;
+ unsigned HalfSize = QuarterSize*2;
+ for (unsigned l = 0; l != 2; ++l) {
+ unsigned LaneStart = l*HalfSize;
+ for (unsigned s = 0; s != 2; ++s) {
+ unsigned QuarterStart = s*QuarterSize;
+ unsigned Src = (Commuted) ? (1-s) : s;
+ unsigned SrcStart = Src*NumElems + LaneStart;
+ for (unsigned i = 0; i != QuarterSize; ++i) {
+ int Idx = Mask[i+QuarterStart+LaneStart];
+ if (!isUndefOrInRange(Idx, SrcStart, SrcStart+HalfSize))
+ return false;
+ // For VSHUFPSY, the mask of the second half must be the same as the first
+ // but with the appropriate offsets. This works in the same way as
+ // VPERMILPS works with masks.
+ if (NumElems == 4 || l == 0 || Mask[i+QuarterStart] < 0)
+ continue;
+ if (!isUndefOrEqual(Idx, Mask[i+QuarterStart]+HalfSize))
+ return false;
+ }
+ }
+ }
return true;
}
-/// getShuffleVSHUFPDYImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VSHUFPDY instruction.
-static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) {
+/// getShuffleVSHUFPYImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VSHUFPSY/VSHUFPDY instructions.
+static unsigned getShuffleVSHUFPYImmediate(SDNode *N) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
EVT VT = SVOp->getValueType(0);
int NumElems = VT.getVectorNumElements();
- assert(NumElems == 4 && VT.getSizeInBits() == 256 &&
- "Only supports v4i64 and v4f64 types");
+ assert(VT.getSizeInBits() == 256 && "Only supports 256-bit types");
+ assert((NumElems == 4 || NumElems == 8) && "Only supports v4 and v8 types");
int HalfSize = NumElems/2;
+ unsigned Mul = (NumElems == 8) ? 2 : 1;
unsigned Mask = 0;
- for (int i = 0; i != NumElems ; ++i) {
- if (SVOp->getMaskElt(i) < 0)
+ for (int i = 0; i != NumElems; ++i) {
+ int Elt = SVOp->getMaskElt(i);
+ if (Elt < 0)
continue;
- int Elt = SVOp->getMaskElt(i) % HalfSize;
- Mask |= Elt << i;
+ Elt %= HalfSize;
+ unsigned Shamt = i;
+ // For VSHUFPSY, the mask of the first half must be equal to the second one.
+ if (NumElems == 8) Shamt %= HalfSize;
+ Mask |= Elt << (Shamt*Mul);
}
return Mask;
@@ -3409,8 +3345,8 @@ static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) {
/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
/// the two vector operands have swapped position.
-static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
- unsigned NumElems = VT.getVectorNumElements();
+static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
+ unsigned NumElems) {
for (unsigned i = 0; i != NumElems; ++i) {
int idx = Mask[i];
if (idx < 0)
@@ -3422,31 +3358,13 @@ static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
}
}
-/// isCommutedVSHUFP() - Return true if swapping operands will
-/// allow to use the "vshufpd" or "vshufps" instruction
-/// for 256-bit vectors
-static bool isCommutedVSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT,
- const X86Subtarget *Subtarget) {
-
- unsigned NumElems = VT.getVectorNumElements();
- if ((VT.getSizeInBits() != 256) || ((NumElems != 4) && (NumElems != 8)))
- return false;
-
- SmallVector<int, 8> CommutedMask;
- for (unsigned i = 0; i < NumElems; ++i)
- CommutedMask.push_back(Mask[i]);
-
- CommuteVectorShuffleMask(CommutedMask, VT);
- return (NumElems == 4) ? isVSHUFPDYMask(CommutedMask, VT, Subtarget):
- isVSHUFPSYMask(CommutedMask, VT, Subtarget);
-}
-
-
/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to 128-bit
-/// SHUFPS and SHUFPD.
-static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
- int NumElems = VT.getVectorNumElements();
+/// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be
+/// reverse of what x86 shuffles want.
+static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool Commuted = false) {
+ unsigned NumElems = VT.getVectorNumElements();
if (VT.getSizeInBits() != 128)
return false;
@@ -3454,12 +3372,14 @@ static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
if (NumElems != 2 && NumElems != 4)
return false;
- int Half = NumElems / 2;
- for (int i = 0; i < Half; ++i)
- if (!isUndefOrInRange(Mask[i], 0, NumElems))
+ unsigned Half = NumElems / 2;
+ unsigned SrcStart = Commuted ? NumElems : 0;
+ for (unsigned i = 0; i != Half; ++i)
+ if (!isUndefOrInRange(Mask[i], SrcStart, SrcStart+NumElems))
return false;
- for (int i = Half; i < NumElems; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
+ SrcStart = Commuted ? 0 : NumElems;
+ for (unsigned i = Half; i != NumElems; ++i)
+ if (!isUndefOrInRange(Mask[i], SrcStart, SrcStart+NumElems))
return false;
return true;
@@ -3471,32 +3391,6 @@ bool X86::isSHUFPMask(ShuffleVectorSDNode *N) {
return ::isSHUFPMask(M, N->getValueType(0));
}
-/// isCommutedSHUFP - Returns true if the shuffle mask is exactly
-/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
-/// half elements to come from vector 1 (which would equal the dest.) and
-/// the upper half to come from vector 2.
-static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
- int NumElems = VT.getVectorNumElements();
-
- if (NumElems != 2 && NumElems != 4)
- return false;
-
- int Half = NumElems / 2;
- for (int i = 0; i < Half; ++i)
- if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
- return false;
- for (int i = Half; i < NumElems; ++i)
- if (!isUndefOrInRange(Mask[i], 0, NumElems))
- return false;
- return true;
-}
-
-static bool isCommutedSHUFP(ShuffleVectorSDNode *N) {
- SmallVector<int, 8> M;
- N->getMask(M);
- return isCommutedSHUFPMask(M, N->getValueType(0));
-}
-
/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
@@ -3765,15 +3659,15 @@ bool X86::isMOVLMask(ShuffleVectorSDNode *N) {
return ::isMOVLMask(M, N->getValueType(0));
}
-/// isVPERM2F128Mask - Match 256-bit shuffles where the elements are considered
+/// isVPERM2X128Mask - Match 256-bit shuffles where the elements are considered
/// as permutations between 128-bit chunks or halves. As an example: this
/// shuffle bellow:
/// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
/// The first half comes from the second half of V1 and the second half from the
/// the second half of V2.
-static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
- const X86Subtarget *Subtarget) {
- if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+static bool isVPERM2X128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool HasAVX) {
+ if (!HasAVX || VT.getSizeInBits() != 256)
return false;
// The shuffle result is divided into half A and half B. In total the two
@@ -3801,10 +3695,9 @@ static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
return MatchA && MatchB;
}
-/// getShuffleVPERM2F128Immediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VPERM2F128 instructions.
-static unsigned getShuffleVPERM2F128Immediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+/// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions.
+static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
EVT VT = SVOp->getValueType(0);
int HalfSize = VT.getVectorNumElements()/2;
@@ -3826,81 +3719,47 @@ static unsigned getShuffleVPERM2F128Immediate(SDNode *N) {
return (FstHalf | (SndHalf << 4));
}
-/// isVPERMILPDMask - Return true if the specified VECTOR_SHUFFLE operand
+/// isVPERMILPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
/// Note that VPERMIL mask matching is different depending whether theunderlying
/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
/// to the same elements of the low, but to the higher half of the source.
/// In VPERMILPD the two lanes could be shuffled independently of each other
/// with the same restriction that lanes can't be crossed.
-static bool isVPERMILPDMask(const SmallVectorImpl<int> &Mask, EVT VT,
- const X86Subtarget *Subtarget) {
+static bool isVPERMILPMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool HasAVX) {
int NumElts = VT.getVectorNumElements();
int NumLanes = VT.getSizeInBits()/128;
- if (!Subtarget->hasAVX())
+ if (!HasAVX)
return false;
- // Only match 256-bit with 64-bit types
- if (VT.getSizeInBits() != 256 || NumElts != 4)
+ // Only match 256-bit with 32/64-bit types
+ if (VT.getSizeInBits() != 256 || (NumElts != 4 && NumElts != 8))
return false;
- // The mask on the high lane is independent of the low. Both can match
- // any element in inside its own lane, but can't cross.
int LaneSize = NumElts/NumLanes;
- for (int l = 0; l < NumLanes; ++l)
- for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) {
- int LaneStart = l*LaneSize;
- if (!isUndefOrInRange(Mask[i], LaneStart, LaneStart+LaneSize))
+ for (int l = 0; l != NumLanes; ++l) {
+ int LaneStart = l*LaneSize;
+ for (int i = 0; i != LaneSize; ++i) {
+ if (!isUndefOrInRange(Mask[i+LaneStart], LaneStart, LaneStart+LaneSize))
+ return false;
+ if (NumElts == 4 || l == 0)
+ continue;
+ // VPERMILPS handling
+ if (Mask[i] < 0)
+ continue;
+ if (!isUndefOrEqual(Mask[i+LaneStart], Mask[i]+LaneSize))
return false;
}
-
- return true;
-}
-
-/// isVPERMILPSMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to VPERMILPS*.
-/// Note that VPERMIL mask matching is different depending whether theunderlying
-/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
-/// to the same elements of the low, but to the higher half of the source.
-/// In VPERMILPD the two lanes could be shuffled independently of each other
-/// with the same restriction that lanes can't be crossed.
-static bool isVPERMILPSMask(const SmallVectorImpl<int> &Mask, EVT VT,
- const X86Subtarget *Subtarget) {
- unsigned NumElts = VT.getVectorNumElements();
- unsigned NumLanes = VT.getSizeInBits()/128;
-
- if (!Subtarget->hasAVX())
- return false;
-
- // Only match 256-bit with 32-bit types
- if (VT.getSizeInBits() != 256 || NumElts != 8)
- return false;
-
- // The mask on the high lane should be the same as the low. Actually,
- // they can differ if any of the corresponding index in a lane is undef
- // and the other stays in range.
- int LaneSize = NumElts/NumLanes;
- for (int i = 0; i < LaneSize; ++i) {
- int HighElt = i+LaneSize;
- bool HighValid = isUndefOrInRange(Mask[HighElt], LaneSize, NumElts);
- bool LowValid = isUndefOrInRange(Mask[i], 0, LaneSize);
-
- if (!HighValid || !LowValid)
- return false;
- if (Mask[i] < 0 || Mask[HighElt] < 0)
- continue;
- if (Mask[HighElt]-Mask[i] != LaneSize)
- return false;
}
return true;
}
-/// getShuffleVPERMILPSImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VPERMILPS* instructions.
-static unsigned getShuffleVPERMILPSImmediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+/// getShuffleVPERMILPImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERMILPS/D* instructions.
+static unsigned getShuffleVPERMILPImmediate(ShuffleVectorSDNode *SVOp) {
EVT VT = SVOp->getValueType(0);
int NumElts = VT.getVectorNumElements();
@@ -3911,43 +3770,22 @@ static unsigned getShuffleVPERMILPSImmediate(SDNode *N) {
// where a mask will match because the same mask element is undef on the
// first half but valid on the second. This would get pathological cases
// such as: shuffle <u, 0, 1, 2, 4, 4, 5, 6>, which is completely valid.
+ unsigned Shift = (LaneSize == 4) ? 2 : 1;
unsigned Mask = 0;
- for (int l = 0; l < NumLanes; ++l) {
- for (int i = 0; i < LaneSize; ++i) {
- int MaskElt = SVOp->getMaskElt(i+(l*LaneSize));
- if (MaskElt < 0)
- continue;
- if (MaskElt >= LaneSize)
- MaskElt -= LaneSize;
- Mask |= MaskElt << (i*2);
- }
+ for (int i = 0; i != NumElts; ++i) {
+ int MaskElt = SVOp->getMaskElt(i);
+ if (MaskElt < 0)
+ continue;
+ MaskElt %= LaneSize;
+ unsigned Shamt = i;
+ // VPERMILPSY, the mask of the first half must be equal to the second one
+ if (NumElts == 8) Shamt %= LaneSize;
+ Mask |= MaskElt << (Shamt*Shift);
}
return Mask;
}
-/// getShuffleVPERMILPDImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VPERMILPD* instructions.
-static unsigned getShuffleVPERMILPDImmediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- EVT VT = SVOp->getValueType(0);
-
- int NumElts = VT.getVectorNumElements();
- int NumLanes = VT.getSizeInBits()/128;
-
- unsigned Mask = 0;
- int LaneSize = NumElts/NumLanes;
- for (int l = 0; l < NumLanes; ++l)
- for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) {
- int MaskElt = SVOp->getMaskElt(i);
- if (MaskElt < 0)
- continue;
- Mask |= (MaskElt-l*LaneSize) << i;
- }
-
- return Mask;
-}
-
/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
/// of what x86 movss want. X86 movs requires the lowest element to be lowest
/// element of vector 2 and the other elements to come from vector 1 in order.
@@ -4035,21 +3873,18 @@ bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N,
/// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to 256-bit
/// version of MOVDDUP.
-static bool isMOVDDUPYMask(ShuffleVectorSDNode *N,
- const X86Subtarget *Subtarget) {
- EVT VT = N->getValueType(0);
+static bool isMOVDDUPYMask(const SmallVectorImpl<int> &Mask, EVT VT,
+ bool HasAVX) {
int NumElts = VT.getVectorNumElements();
- bool V2IsUndef = N->getOperand(1).getOpcode() == ISD::UNDEF;
- if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256 ||
- !V2IsUndef || NumElts != 4)
+ if (!HasAVX || VT.getSizeInBits() != 256 || NumElts != 4)
return false;
for (int i = 0; i != NumElts/2; ++i)
- if (!isUndefOrEqual(N->getMaskElt(i), 0))
+ if (!isUndefOrEqual(Mask[i], 0))
return false;
for (int i = NumElts/2; i != NumElts; ++i)
- if (!isUndefOrEqual(N->getMaskElt(i), NumElts/2))
+ if (!isUndefOrEqual(Mask[i], NumElts/2))
return false;
return true;
}
@@ -4164,14 +3999,13 @@ unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
-unsigned X86::getShufflePALIGNRImmediate(SDNode *N) {
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- EVT VVT = N->getValueType(0);
- unsigned EltSize = VVT.getVectorElementType().getSizeInBits() >> 3;
+static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
+ EVT VT = SVOp->getValueType(0);
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3;
int Val = 0;
unsigned i, e;
- for (i = 0, e = VVT.getVectorNumElements(); i != e; ++i) {
+ for (i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
Val = SVOp->getMaskElt(i);
if (Val >= 0)
break;
@@ -4631,29 +4465,14 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
case X86ISD::SHUFPS:
case X86ISD::SHUFPD:
ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeSHUFPSMask(NumElems,
- cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::PUNPCKHBW:
- case X86ISD::PUNPCKHWD:
- case X86ISD::PUNPCKHDQ:
- case X86ISD::PUNPCKHQDQ:
- DecodePUNPCKHMask(NumElems, ShuffleMask);
- break;
- case X86ISD::UNPCKHPS:
- case X86ISD::UNPCKHPD:
- DecodeUNPCKHPMask(VT, ShuffleMask);
+ DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
break;
- case X86ISD::PUNPCKLBW:
- case X86ISD::PUNPCKLWD:
- case X86ISD::PUNPCKLDQ:
- case X86ISD::PUNPCKLQDQ:
- DecodePUNPCKLMask(VT, ShuffleMask);
+ case X86ISD::UNPCKH:
+ DecodeUNPCKHMask(VT, ShuffleMask);
break;
- case X86ISD::UNPCKLPS:
- case X86ISD::UNPCKLPD:
- DecodeUNPCKLPMask(VT, ShuffleMask);
+ case X86ISD::UNPCKL:
+ DecodeUNPCKLMask(VT, ShuffleMask);
break;
case X86ISD::MOVHLPS:
DecodeMOVHLPSMask(NumElems, ShuffleMask);
@@ -4686,27 +4505,12 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
Depth+1);
}
- case X86ISD::VPERMILPS:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeVPERMILPSMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::VPERMILPSY:
+ case X86ISD::VPERMILP:
ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeVPERMILPSMask(8, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ DecodeVPERMILPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
ShuffleMask);
break;
- case X86ISD::VPERMILPD:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeVPERMILPDMask(2, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::VPERMILPDY:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeVPERMILPDMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::VPERM2F128:
+ case X86ISD::VPERM2X128:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERM2F128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
ShuffleMask);
@@ -5334,8 +5138,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DAG);
} else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
- assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
- EVT MiddleVT = MVT::v4i32;
+ unsigned NumBits = VT.getSizeInBits();
+ assert((NumBits == 128 || NumBits == 256) &&
+ "Expected an SSE or AVX value type!");
+ EVT MiddleVT = NumBits == 128 ? MVT::v4i32 : MVT::v8i32;
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true,
Subtarget->hasXMMInt(), DAG);
@@ -6256,7 +6062,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
// from X.
if (NumHi == 3) {
// Normalize it so the 3 elements come from V1.
- CommuteVectorShuffleMask(PermMask, VT);
+ CommuteVectorShuffleMask(PermMask, 4);
std::swap(V1, V2);
}
@@ -6566,70 +6372,6 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {
X86::getShuffleSHUFImmediate(SVOp), DAG);
}
-static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) {
- switch(VT.getSimpleVT().SimpleTy) {
- case MVT::v4i32: return X86ISD::PUNPCKLDQ;
- case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
- case MVT::v8i32:
- if (HasAVX2) return X86ISD::PUNPCKLDQ;
- // else use fp unit for int unpack.
- case MVT::v8f32:
- case MVT::v4f32: return X86ISD::UNPCKLPS;
- case MVT::v4i64:
- if (HasAVX2) return X86ISD::PUNPCKLQDQ;
- // else use fp unit for int unpack.
- case MVT::v4f64:
- case MVT::v2f64: return X86ISD::UNPCKLPD;
- case MVT::v32i8:
- case MVT::v16i8: return X86ISD::PUNPCKLBW;
- case MVT::v16i16:
- case MVT::v8i16: return X86ISD::PUNPCKLWD;
- default:
- llvm_unreachable("Unknown type for unpckl");
- }
- return 0;
-}
-
-static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) {
- switch(VT.getSimpleVT().SimpleTy) {
- case MVT::v4i32: return X86ISD::PUNPCKHDQ;
- case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
- case MVT::v8i32:
- if (HasAVX2) return X86ISD::PUNPCKHDQ;
- // else use fp unit for int unpack.
- case MVT::v8f32:
- case MVT::v4f32: return X86ISD::UNPCKHPS;
- case MVT::v4i64:
- if (HasAVX2) return X86ISD::PUNPCKHQDQ;
- // else use fp unit for int unpack.
- case MVT::v4f64:
- case MVT::v2f64: return X86ISD::UNPCKHPD;
- case MVT::v32i8:
- case MVT::v16i8: return X86ISD::PUNPCKHBW;
- case MVT::v16i16:
- case MVT::v8i16: return X86ISD::PUNPCKHWD;
- default:
- llvm_unreachable("Unknown type for unpckh");
- }
- return 0;
-}
-
-static inline unsigned getVPERMILOpcode(EVT VT) {
- switch(VT.getSimpleVT().SimpleTy) {
- case MVT::v4i32:
- case MVT::v4f32: return X86ISD::VPERMILPS;
- case MVT::v2i64:
- case MVT::v2f64: return X86ISD::VPERMILPD;
- case MVT::v8i32:
- case MVT::v8f32: return X86ISD::VPERMILPSY;
- case MVT::v4i64:
- case MVT::v4f64: return X86ISD::VPERMILPDY;
- default:
- llvm_unreachable("Unknown type for vpermil");
- }
- return 0;
-}
-
static
SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI,
@@ -6703,17 +6445,19 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
unsigned NumElems = VT.getVectorNumElements();
- bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
bool V1IsSplat = false;
bool V2IsSplat = false;
bool HasXMMInt = Subtarget->hasXMMInt();
+ bool HasAVX = Subtarget->hasAVX();
bool HasAVX2 = Subtarget->hasAVX2();
MachineFunction &MF = DAG.getMachineFunction();
bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
+ assert(V1.getOpcode() != ISD::UNDEF && "Op 1 of shuffle should not be undef");
+
// Vector shuffle lowering takes 3 steps:
//
// 1) Normalize the input vectors. Here splats, zeroed vectors, profitable
@@ -6738,11 +6482,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
// unpckh_undef). Only use pshufd if speed is more important than size.
if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
- DAG);
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
- return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
- DAG);
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
if (X86::isMOVDDUPMask(SVOp) && Subtarget->hasSSE3orAVX() &&
V2IsUndef && RelaxedMayFoldVectorLoad(V1))
@@ -6754,8 +6496,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// Use to match splats
if (HasXMMInt && X86::isUNPCKHMask(SVOp, HasAVX2) && V2IsUndef &&
(VT == MVT::v2f64 || VT == MVT::v2i64))
- return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
- DAG);
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
if (X86::isPSHUFDMask(SVOp)) {
// The actual implementation will match the mask in the if above and then
@@ -6787,8 +6528,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
if (X86::isMOVLMask(SVOp)) {
- if (V1IsUndef)
- return V2;
if (ISD::isBuildVectorAllZeros(V1.getNode()))
return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
if (!X86::isMOVLPMask(SVOp)) {
@@ -6834,17 +6573,19 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
V2IsSplat = isSplatVector(V2.getNode());
// Canonicalize the splat or undef, if present, to be on the RHS.
- if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
+ if (V1IsSplat && !V2IsSplat) {
Op = CommuteVectorShuffle(SVOp, DAG);
SVOp = cast<ShuffleVectorSDNode>(Op);
V1 = SVOp->getOperand(0);
V2 = SVOp->getOperand(1);
std::swap(V1IsSplat, V2IsSplat);
- std::swap(V1IsUndef, V2IsUndef);
Commuted = true;
}
- if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) {
+ SmallVector<int, 32> M;
+ SVOp->getMask(M);
+
+ if (isCommutedMOVLMask(M, VT, V2IsSplat, V2IsUndef)) {
// Shuffling low element of v1 into undef, just return v1.
if (V2IsUndef)
return V1;
@@ -6854,13 +6595,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getMOVL(DAG, dl, VT, V2, V1);
}
- if (X86::isUNPCKLMask(SVOp, HasAVX2))
- return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V2,
- DAG);
+ if (isUNPCKLMask(M, VT, HasAVX2))
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
- if (X86::isUNPCKHMask(SVOp, HasAVX2))
- return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V2,
- DAG);
+ if (isUNPCKHMask(M, VT, HasAVX2))
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
@@ -6884,35 +6623,30 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
if (X86::isUNPCKLMask(NewSVOp, HasAVX2))
- return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V2, V1,
- DAG);
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V2, V1, DAG);
if (X86::isUNPCKHMask(NewSVOp, HasAVX2))
- return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V2, V1,
- DAG);
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V2, V1, DAG);
}
// Normalize the node to match x86 shuffle ops if needed
- if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
+ if (!V2IsUndef && (isSHUFPMask(M, VT, /* Commuted */ true) ||
+ isVSHUFPYMask(M, VT, HasAVX, /* Commuted */ true)))
return CommuteVectorShuffle(SVOp, DAG);
// The checks below are all present in isShuffleMaskLegal, but they are
// inlined here right now to enable us to directly emit target specific
// nodes, and remove one by one until they don't return Op anymore.
- SmallVector<int, 16> M;
- SVOp->getMask(M);
if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX()))
return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
- X86::getShufflePALIGNRImmediate(SVOp),
+ getShufflePALIGNRImmediate(SVOp),
DAG);
if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
SVOp->getSplatIndex() == 0 && V2IsUndef) {
- if (VT == MVT::v2f64)
- return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
- if (VT == MVT::v2i64)
- return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
+ if (VT == MVT::v2f64 || VT == MVT::v2i64)
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
}
if (isPSHUFHWMask(M, VT))
@@ -6929,12 +6663,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
X86::getShuffleSHUFImmediate(SVOp), DAG);
- if (X86::isUNPCKL_v_undef_Mask(SVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
- DAG);
- if (X86::isUNPCKH_v_undef_Mask(SVOp))
- return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
- DAG);
+ if (isUNPCKL_v_undef_Mask(M, VT))
+ return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
+ if (isUNPCKH_v_undef_Mask(M, VT))
+ return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
//===--------------------------------------------------------------------===//
// Generate target specific nodes for 128 or 256-bit shuffles only
@@ -6942,44 +6674,23 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
//
// Handle VMOVDDUPY permutations
- if (isMOVDDUPYMask(SVOp, Subtarget))
+ if (V2IsUndef && isMOVDDUPYMask(M, VT, HasAVX))
return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
- // Handle VPERMILPS* permutations
- if (isVPERMILPSMask(M, VT, Subtarget))
- return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
- getShuffleVPERMILPSImmediate(SVOp), DAG);
-
- // Handle VPERMILPD* permutations
- if (isVPERMILPDMask(M, VT, Subtarget))
- return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
- getShuffleVPERMILPDImmediate(SVOp), DAG);
+ // Handle VPERMILPS/D* permutations
+ if (isVPERMILPMask(M, VT, HasAVX))
+ return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
+ getShuffleVPERMILPImmediate(SVOp), DAG);
- // Handle VPERM2F128 permutations
- if (isVPERM2F128Mask(M, VT, Subtarget))
- return getTargetShuffleNode(X86ISD::VPERM2F128, dl, VT, V1, V2,
- getShuffleVPERM2F128Immediate(SVOp), DAG);
+ // Handle VPERM2F128/VPERM2I128 permutations
+ if (isVPERM2X128Mask(M, VT, HasAVX))
+ return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
+ V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
- // Handle VSHUFPSY permutations
- if (isVSHUFPSYMask(M, VT, Subtarget))
+ // Handle VSHUFPS/DY permutations
+ if (isVSHUFPYMask(M, VT, HasAVX))
return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
- getShuffleVSHUFPSYImmediate(SVOp), DAG);
-
- // Handle VSHUFPDY permutations
- if (isVSHUFPDYMask(M, VT, Subtarget))
- return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
- getShuffleVSHUFPDYImmediate(SVOp), DAG);
-
- // Try to swap operands in the node to match x86 shuffle ops
- if (isCommutedVSHUFPMask(M, VT, Subtarget)) {
- // Now we need to commute operands.
- SVOp = cast<ShuffleVectorSDNode>(CommuteVectorShuffle(SVOp, DAG));
- V1 = SVOp->getOperand(0);
- V2 = SVOp->getOperand(1);
- unsigned Immediate = (NumElems == 4) ? getShuffleVSHUFPDYImmediate(SVOp):
- getShuffleVSHUFPSYImmediate(SVOp);
- return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, Immediate, DAG);
- }
+ getShuffleVSHUFPYImmediate(SVOp), DAG);
//===--------------------------------------------------------------------===//
// Since no target specific shuffle was selected for this generic one,
@@ -7888,7 +7599,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
LLVMContext *Context = DAG.getContext();
// Build some magic constants.
- std::vector<Constant*> CV0;
+ SmallVector<Constant*,4> CV0;
CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000)));
CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000)));
CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
@@ -7896,7 +7607,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
Constant *C0 = ConstantVector::get(CV0);
SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
- std::vector<Constant*> CV1;
+ SmallVector<Constant*,2> CV1;
CV1.push_back(
ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL))));
CV1.push_back(
@@ -8176,17 +7887,13 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op,
EVT EltVT = VT;
if (VT.isVector())
EltVT = VT.getVectorElementType();
- std::vector<Constant*> CV;
+ SmallVector<Constant*,4> CV;
if (EltVT == MVT::f64) {
Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))));
- CV.push_back(C);
- CV.push_back(C);
+ CV.assign(2, C);
} else {
Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))));
- CV.push_back(C);
- CV.push_back(C);
- CV.push_back(C);
- CV.push_back(C);
+ CV.assign(4, C);
}
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
@@ -8201,19 +7908,18 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
EVT EltVT = VT;
- if (VT.isVector())
+ unsigned NumElts = VT == MVT::f64 ? 2 : 4;
+ if (VT.isVector()) {
EltVT = VT.getVectorElementType();
- std::vector<Constant*> CV;
+ NumElts = VT.getVectorNumElements();
+ }
+ SmallVector<Constant*,8> CV;
if (EltVT == MVT::f64) {
Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)));
- CV.push_back(C);
- CV.push_back(C);
+ CV.assign(NumElts, C);
} else {
Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
- CV.push_back(C);
- CV.push_back(C);
- CV.push_back(C);
- CV.push_back(C);
+ CV.assign(NumElts, C);
}
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
@@ -8221,11 +7927,12 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
if (VT.isVector()) {
+ MVT XORVT = VT.getSizeInBits() == 128 ? MVT::v2i64 : MVT::v4i64;
return DAG.getNode(ISD::BITCAST, dl, VT,
- DAG.getNode(ISD::XOR, dl, MVT::v2i64,
- DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+ DAG.getNode(ISD::XOR, dl, XORVT,
+ DAG.getNode(ISD::BITCAST, dl, XORVT,
Op.getOperand(0)),
- DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Mask)));
+ DAG.getNode(ISD::BITCAST, dl, XORVT, Mask)));
} else {
return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
}
@@ -8254,7 +7961,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
// type, and that won't be f80 since that is not custom lowered.
// First get the sign bit of second operand.
- std::vector<Constant*> CV;
+ SmallVector<Constant*,4> CV;
if (SrcVT == MVT::f64) {
CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))));
CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
@@ -9253,7 +8960,7 @@ SDValue
X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows() ||
- EnableSegmentedStacks) &&
+ getTargetMachine().Options.EnableSegmentedStacks) &&
"This should be used only on Windows targets or when segmented stacks "
"are being used");
assert(!Subtarget->isTargetEnvMacho() && "Not implemented");
@@ -9267,7 +8974,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
bool Is64Bit = Subtarget->is64Bit();
EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;
- if (EnableSegmentedStacks) {
+ if (getTargetMachine().Options.EnableSegmentedStacks) {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -9403,7 +9110,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
if (ArgMode == 2) {
// Sanity Check: Make sure using fp_offset makes sense.
- assert(!UseSoftFloat &&
+ assert(!getTargetMachine().Options.UseSoftFloat &&
!(DAG.getMachineFunction()
.getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
Subtarget->hasXMM());
@@ -10472,7 +10179,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
DAG.getConstant(4, MVT::i32));
- R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M);
+ R = DAG.getNode(ISD::VSELECT, dl, VT, Op, M, R);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
@@ -10487,13 +10194,13 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
DAG.getConstant(2, MVT::i32));
- R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M);
+ R = DAG.getNode(ISD::VSELECT, dl, VT, Op, M, R);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
// return pblendv(r, r+r, a);
R = DAG.getNode(ISD::VSELECT, dl, VT, Op,
- R, DAG.getNode(ISD::ADD, dl, VT, R, R));
+ DAG.getNode(ISD::ADD, dl, VT, R, R), R);
return R;
}
@@ -11194,6 +10901,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::ANDNP: return "X86ISD::ANDNP";
case X86ISD::PSIGN: return "X86ISD::PSIGN";
case X86ISD::BLENDV: return "X86ISD::BLENDV";
+ case X86ISD::HADD: return "X86ISD::HADD";
+ case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
case X86ISD::FHSUB: return "X86ISD::FHSUB";
case X86ISD::FMAX: return "X86ISD::FMAX";
@@ -11266,24 +10975,11 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::MOVSLDUP_LD: return "X86ISD::MOVSLDUP_LD";
case X86ISD::MOVSD: return "X86ISD::MOVSD";
case X86ISD::MOVSS: return "X86ISD::MOVSS";
- case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS";
- case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD";
- case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS";
- case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD";
- case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW";
- case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD";
- case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ";
- case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ";
- case X86ISD::PUNPCKHBW: return "X86ISD::PUNPCKHBW";
- case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD";
- case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ";
- case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ";
+ case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
+ case X86ISD::UNPCKH: return "X86ISD::UNPCKH";
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
- case X86ISD::VPERMILPS: return "X86ISD::VPERMILPS";
- case X86ISD::VPERMILPSY: return "X86ISD::VPERMILPSY";
- case X86ISD::VPERMILPD: return "X86ISD::VPERMILPD";
- case X86ISD::VPERMILPDY: return "X86ISD::VPERMILPDY";
- case X86ISD::VPERM2F128: return "X86ISD::VPERM2F128";
+ case X86ISD::VPERMILP: return "X86ISD::VPERMILP";
+ case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
@@ -11391,7 +11087,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
EVT VT) const {
// Very little shuffling can be done for 64-bit vectors right now.
if (VT.getSizeInBits() == 64)
- return isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX());
+ return false;
// FIXME: pshufb, blends, shifts.
return (VT.getVectorNumElements() == 2 ||
@@ -11419,7 +11115,7 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
return (isMOVLMask(Mask, VT) ||
isCommutedMOVLMask(Mask, VT, true) ||
isSHUFPMask(Mask, VT) ||
- isCommutedSHUFPMask(Mask, VT));
+ isSHUFPMask(Mask, VT, /* Commuted */ true));
}
return false;
}
@@ -12289,7 +11985,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
MachineFunction *MF = BB->getParent();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- assert(EnableSegmentedStacks);
+ assert(getTargetMachine().Options.EnableSegmentedStacks);
unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
unsigned TlsOffset = Is64Bit ? 0x70 : 0x30;
@@ -13169,7 +12865,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
std::swap(LHS, RHS);
@@ -13179,7 +12875,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
case ISD::SETOLE:
// Converting this to a min would handle comparisons between positive
// and negative zero incorrectly.
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
break;
Opcode = X86ISD::FMIN;
@@ -13197,7 +12893,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
case ISD::SETOGE:
// Converting this to a max would handle comparisons between positive
// and negative zero incorrectly.
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
break;
Opcode = X86ISD::FMAX;
@@ -13207,7 +12903,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
std::swap(LHS, RHS);
@@ -13233,7 +12929,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Converting this to a min would handle comparisons between positive
// and negative zero incorrectly, and swapping the operands would
// cause it to handle NaNs incorrectly.
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
@@ -13243,7 +12939,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
break;
case ISD::SETUGT:
// Converting this to a min would handle NaNs incorrectly.
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
(!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
break;
Opcode = X86ISD::FMIN;
@@ -13268,7 +12964,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Converting this to a max would handle comparisons between positive
// and negative zero incorrectly, and swapping the operands would
// cause it to handle NaNs incorrectly.
- if (!UnsafeFPMath &&
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
@@ -14048,7 +13744,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
X = DAG.getNode(ISD::BITCAST, DL, BlendVT, X);
Y = DAG.getNode(ISD::BITCAST, DL, BlendVT, Y);
Mask = DAG.getNode(ISD::BITCAST, DL, BlendVT, Mask);
- Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, X, Y);
+ Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X);
return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
}
}
@@ -14232,7 +13928,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue StoredVal = St->getOperand(1);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- // If we are saving a concatination of two XMM registers, perform two stores.
+ // If we are saving a concatenation of two XMM registers, perform two stores.
// This is better in Sandy Bridge cause one 256-bit mem op is done via two
// 128-bit ones. If in the future the cost becomes only one memory access the
// first version would be better.
@@ -14342,7 +14038,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
const Function *F = DAG.getMachineFunction().getFunction();
bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
- bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps
+ bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps
&& Subtarget->hasXMMInt();
if ((VT.isVector() ||
(VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
@@ -14458,7 +14154,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
/// set to A, RHS to B, and the routine returns 'true'.
/// Note that the binary operation should have the property that if one of the
/// operands is UNDEF then the result is UNDEF.
-static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
+static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
// Look for the following pattern: if
// A = < float a0, float a1, float a2, float a3 >
// B = < float b0, float b1, float b2, float b3 >
@@ -14474,7 +14170,18 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
return false;
EVT VT = LHS.getValueType();
- unsigned N = VT.getVectorNumElements();
+
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Unsupported vector type for horizontal add/sub");
+
+ // Handle 128 and 256-bit vector lengths. AVX defines horizontal add/sub to
+ // operate independently on 128-bit lanes.
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+ assert((NumLaneElts % 2 == 0) &&
+ "Vector type should have an even number of elements in each lane");
+ unsigned HalfLaneElts = NumLaneElts/2;
// View LHS in the form
// LHS = VECTOR_SHUFFLE A, B, LMask
@@ -14483,7 +14190,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
// NOTE: in what follows a default initialized SDValue represents an UNDEF of
// type VT.
SDValue A, B;
- SmallVector<int, 8> LMask(N);
+ SmallVector<int, 16> LMask(NumElts);
if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
if (LHS.getOperand(0).getOpcode() != ISD::UNDEF)
A = LHS.getOperand(0);
@@ -14493,14 +14200,14 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
} else {
if (LHS.getOpcode() != ISD::UNDEF)
A = LHS;
- for (unsigned i = 0; i != N; ++i)
+ for (unsigned i = 0; i != NumElts; ++i)
LMask[i] = i;
}
// Likewise, view RHS in the form
// RHS = VECTOR_SHUFFLE C, D, RMask
SDValue C, D;
- SmallVector<int, 8> RMask(N);
+ SmallVector<int, 16> RMask(NumElts);
if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
if (RHS.getOperand(0).getOpcode() != ISD::UNDEF)
C = RHS.getOperand(0);
@@ -14510,7 +14217,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
} else {
if (RHS.getOpcode() != ISD::UNDEF)
C = RHS;
- for (unsigned i = 0; i != N; ++i)
+ for (unsigned i = 0; i != NumElts; ++i)
RMask[i] = i;
}
@@ -14525,30 +14232,28 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
// If A and B occur in reverse order in RHS, then "swap" them (which means
// rewriting the mask).
if (A != C)
- for (unsigned i = 0; i != N; ++i) {
- unsigned Idx = RMask[i];
- if (Idx < N)
- RMask[i] += N;
- else if (Idx < 2*N)
- RMask[i] -= N;
- }
+ CommuteVectorShuffleMask(RMask, NumElts);
// At this point LHS and RHS are equivalent to
// LHS = VECTOR_SHUFFLE A, B, LMask
// RHS = VECTOR_SHUFFLE A, B, RMask
// Check that the masks correspond to performing a horizontal operation.
- for (unsigned i = 0; i != N; ++i) {
- unsigned LIdx = LMask[i], RIdx = RMask[i];
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int LIdx = LMask[i], RIdx = RMask[i];
// Ignore any UNDEF components.
- if (LIdx >= 2*N || RIdx >= 2*N || (!A.getNode() && (LIdx < N || RIdx < N))
- || (!B.getNode() && (LIdx >= N || RIdx >= N)))
+ if (LIdx < 0 || RIdx < 0 ||
+ (!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) ||
+ (!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts)))
continue;
// Check that successive elements are being operated on. If not, this is
// not a horizontal operation.
- if (!(LIdx == 2*i && RIdx == 2*i + 1) &&
- !(isCommutative && LIdx == 2*i + 1 && RIdx == 2*i))
+ unsigned Src = (i/HalfLaneElts) % 2; // each lane is split between srcs
+ unsigned LaneStart = (i/NumLaneElts) * NumLaneElts;
+ int Index = 2*(i%HalfLaneElts) + NumElts*Src + LaneStart;
+ if (!(LIdx == Index && RIdx == Index + 1) &&
+ !(IsCommutative && LIdx == Index + 1 && RIdx == Index))
return false;
}
@@ -14565,7 +14270,8 @@ static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
SDValue RHS = N->getOperand(1);
// Try to synthesize horizontal adds from adds of shuffles.
- if (Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64) &&
+ if (((Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+ (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
isHorizontalBinOp(LHS, RHS, true))
return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS);
return SDValue();
@@ -14579,7 +14285,8 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
SDValue RHS = N->getOperand(1);
// Try to synthesize horizontal subs from subs of shuffles.
- if (Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64) &&
+ if (((Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+ (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
isHorizontalBinOp(LHS, RHS, false))
return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS);
return SDValue();
@@ -14783,7 +14490,8 @@ static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG,
SDValue Op1 = N->getOperand(1);
// Try to synthesize horizontal adds from adds of shuffles.
- if ((Subtarget->hasSSSE3orAVX()) && (VT == MVT::v8i16 || VT == MVT::v4i32) &&
+ if (((Subtarget->hasSSSE3orAVX() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
+ (Subtarget->hasAVX2() && (VT == MVT::v16i16 || MVT::v8i32))) &&
isHorizontalBinOp(Op0, Op1, true))
return DAG.getNode(X86ISD::HADD, N->getDebugLoc(), VT, Op0, Op1);
@@ -14815,8 +14523,9 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG,
// Try to synthesize horizontal adds from adds of shuffles.
EVT VT = N->getValueType(0);
- if ((Subtarget->hasSSSE3orAVX()) && (VT == MVT::v8i16 || VT == MVT::v4i32) &&
- isHorizontalBinOp(Op0, Op1, false))
+ if (((Subtarget->hasSSSE3orAVX() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
+ (Subtarget->hasAVX2() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+ isHorizontalBinOp(Op0, Op1, true))
return DAG.getNode(X86ISD::HSUB, N->getDebugLoc(), VT, Op0, Op1);
return OptimizeConditionalInDecrement(N, DAG);
@@ -14857,18 +14566,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::SHUFPS: // Handle all target specific shuffles
case X86ISD::SHUFPD:
case X86ISD::PALIGN:
- case X86ISD::PUNPCKHBW:
- case X86ISD::PUNPCKHWD:
- case X86ISD::PUNPCKHDQ:
- case X86ISD::PUNPCKHQDQ:
- case X86ISD::UNPCKHPS:
- case X86ISD::UNPCKHPD:
- case X86ISD::PUNPCKLBW:
- case X86ISD::PUNPCKLWD:
- case X86ISD::PUNPCKLDQ:
- case X86ISD::PUNPCKLQDQ:
- case X86ISD::UNPCKLPS:
- case X86ISD::UNPCKLPD:
+ case X86ISD::UNPCKH:
+ case X86ISD::UNPCKL:
case X86ISD::MOVHLPS:
case X86ISD::MOVLHPS:
case X86ISD::PSHUFD:
@@ -14876,11 +14575,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PSHUFLW:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
- case X86ISD::VPERMILPS:
- case X86ISD::VPERMILPSY:
- case X86ISD::VPERMILPD:
- case X86ISD::VPERMILPDY:
- case X86ISD::VPERM2F128:
+ case X86ISD::VPERMILP:
+ case X86ISD::VPERM2X128:
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index ccff3a5..cfc1f88 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -273,23 +273,10 @@ namespace llvm {
MOVLPD,
MOVSD,
MOVSS,
- UNPCKLPS,
- UNPCKLPD,
- UNPCKHPS,
- UNPCKHPD,
- PUNPCKLBW,
- PUNPCKLWD,
- PUNPCKLDQ,
- PUNPCKLQDQ,
- PUNPCKHBW,
- PUNPCKHWD,
- PUNPCKHDQ,
- PUNPCKHQDQ,
- VPERMILPS,
- VPERMILPSY,
- VPERMILPD,
- VPERMILPDY,
- VPERM2F128,
+ UNPCKL,
+ UNPCKH,
+ VPERMILP,
+ VPERM2X128,
VBROADCAST,
// VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
@@ -468,10 +455,6 @@ namespace llvm {
/// the specified VECTOR_SHUFFLE mask with PSHUFLW instruction.
unsigned getShufflePSHUFLWImmediate(SDNode *N);
- /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
- /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
- unsigned getShufflePALIGNRImmediate(SDNode *N);
-
/// getExtractVEXTRACTF128Immediate - Return the appropriate
/// immediate to extract the specified EXTRACT_SUBVECTOR index
/// with VEXTRACTF128 instructions.
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index d868773..f443088 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -58,3 +58,391 @@ let isAsmParserOnly = 1 in {
defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">;
defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W;
}
+
+//===----------------------------------------------------------------------===//
+// FMA4 - AMD 4 operand Fused Multiply-Add instructions
+//===----------------------------------------------------------------------===//
+
+
+multiclass fma4s<bits<8> opc, string OpcodeStr> {
+ def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, XOP_W;
+ def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, XOP_W;
+ def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>;
+
+}
+
+multiclass fma4p<bits<8> opc, string OpcodeStr> {
+ def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, XOP_W;
+ def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, XOP_W;
+ def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>;
+ def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, XOP_W;
+ def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, XOP_W;
+ def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VFMADDSS4 : fma4s<0x6A, "vfmaddss">;
+ defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd">;
+ defm VFMADDPS4 : fma4p<0x68, "vfmaddps">;
+ defm VFMADDPD4 : fma4p<0x69, "vfmaddpd">;
+ defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss">;
+ defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd">;
+ defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps">;
+ defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd">;
+ defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss">;
+ defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd">;
+ defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps">;
+ defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd">;
+ defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss">;
+ defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd">;
+ defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps">;
+ defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd">;
+ defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps">;
+ defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd">;
+ defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps">;
+ defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd">;
+}
+
+// FMA4 Intrinsics patterns
+
+// VFMADD
+def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMADDSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, VR128:$src2,
+ (alignedloadv4f32 addr:$src3)),
+ (VFMADDSS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, (alignedloadv4f32 addr:$src2),
+ VR128:$src3),
+ (VFMADDSS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMADDSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFMADDSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFMADDSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, VR128:$src2,
+ (alignedloadv4f32 addr:$src3)),
+ (VFMADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
+ VR128:$src3),
+ (VFMADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFMADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFMADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFMADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1, VR256:$src2,
+ (alignedloadv8f32 addr:$src3)),
+ (VFMADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1,
+ (alignedloadv8f32 addr:$src2),
+ VR256:$src3),
+ (VFMADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFMADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1, VR256:$src2,
+ (alignedloadv4f64 addr:$src3)),
+ (VFMADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1,
+ (alignedloadv4f64 addr:$src2),
+ VR256:$src3),
+ (VFMADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+// VFMSUB
+def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMSUBSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, VR128:$src2,
+ (alignedloadv4f32 addr:$src3)),
+ (VFMSUBSS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, (alignedloadv4f32 addr:$src2),
+ VR128:$src3),
+ (VFMSUBSS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMSUBSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFMSUBSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFMSUBSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, VR128:$src2,
+ (alignedloadv4f32 addr:$src3)),
+ (VFMSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
+ VR128:$src3),
+ (VFMSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFMSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFMSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFMSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1, VR256:$src2,
+ (alignedloadv8f32 addr:$src3)),
+ (VFMSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1,
+ (alignedloadv8f32 addr:$src2),
+ VR256:$src3),
+ (VFMSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFMSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1, VR256:$src2,
+ (alignedloadv4f64 addr:$src3)),
+ (VFMSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1,
+ (alignedloadv4f64 addr:$src2),
+ VR256:$src3),
+ (VFMSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+// VFNMADD
+def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFNMADDSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, VR128:$src2,
+ (alignedloadv4f32 addr:$src3)),
+ (VFNMADDSS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, (alignedloadv4f32 addr:$src2),
+ VR128:$src3),
+ (VFNMADDSS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFNMADDSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFNMADDSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFNMADDSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFNMADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, VR128:$src2,
+ (alignedloadv4f32 addr:$src3)),
+ (VFNMADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
+ VR128:$src3),
+ (VFNMADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFNMADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFNMADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFNMADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFNMADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1, VR256:$src2,
+ (alignedloadv8f32 addr:$src3)),
+ (VFNMADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1,
+ (alignedloadv8f32 addr:$src2),
+ VR256:$src3),
+ (VFNMADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFNMADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1, VR256:$src2,
+ (alignedloadv4f64 addr:$src3)),
+ (VFNMADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1,
+ (alignedloadv4f64 addr:$src2),
+ VR256:$src3),
+ (VFNMADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+// VFNMSUB
+def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFNMSUBSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, VR128:$src2,
+ (alignedloadv4f32 addr:$src3)),
+ (VFNMSUBSS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, (alignedloadv4f32 addr:$src2),
+ VR128:$src3),
+ (VFNMSUBSS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFNMSUBSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFNMSUBSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFNMSUBSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFNMSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, VR128:$src2,
+ (alignedloadv4f32 addr:$src3)),
+ (VFNMSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
+ VR128:$src3),
+ (VFNMSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFNMSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFNMSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFNMSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFNMSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1, VR256:$src2,
+ (alignedloadv8f32 addr:$src3)),
+ (VFNMSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1,
+ (alignedloadv8f32 addr:$src2),
+ VR256:$src3),
+ (VFNMSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFNMSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1, VR256:$src2,
+ (alignedloadv4f64 addr:$src3)),
+ (VFNMSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1,
+ (alignedloadv4f64 addr:$src2),
+ VR256:$src3),
+ (VFNMSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+// VFMADDSUB
+def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMADDSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, VR128:$src2,
+ (alignedloadv4f32 addr:$src3)),
+ (VFMADDSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
+ VR128:$src3),
+ (VFMADDSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMADDSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFMADDSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFMADDSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFMADDSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1, VR256:$src2,
+ (alignedloadv8f32 addr:$src3)),
+ (VFMADDSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1,
+ (alignedloadv8f32 addr:$src2),
+ VR256:$src3),
+ (VFMADDSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFMADDSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1, VR256:$src2,
+ (alignedloadv4f64 addr:$src3)),
+ (VFMADDSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1,
+ (alignedloadv4f64 addr:$src2),
+ VR256:$src3),
+ (VFMADDSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+// VFMSUBADD
+def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMSUBADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, VR128:$src2,
+ (alignedloadv4f32 addr:$src3)),
+ (VFMSUBADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
+ VR128:$src3),
+ (VFMSUBADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, VR128:$src2, VR128:$src3),
+ (VFMSUBADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, VR128:$src2,
+ (alignedloadv2f64 addr:$src3)),
+ (VFMSUBADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
+ VR128:$src3),
+ (VFMSUBADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFMSUBADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1, VR256:$src2,
+ (alignedloadv8f32 addr:$src3)),
+ (VFMSUBADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1,
+ (alignedloadv8f32 addr:$src2),
+ VR256:$src3),
+ (VFMSUBADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
+ (VFMSUBADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1, VR256:$src2,
+ (alignedloadv4f64 addr:$src3)),
+ (VFMSUBADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1,
+ (alignedloadv4f64 addr:$src2),
+ VR256:$src3),
+ (VFMSUBADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index ecd6a93..7ba3639 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -110,6 +110,8 @@ class A7 { bits<5> Prefix = 16; }
class T8XD { bits<5> Prefix = 17; }
class T8XS { bits<5> Prefix = 18; }
class TAXD { bits<5> Prefix = 19; }
+class XOP8 { bits<5> Prefix = 20; }
+class XOP9 { bits<5> Prefix = 21; }
class VEX { bit hasVEXPrefix = 1; }
class VEX_W { bit hasVEX_WPrefix = 1; }
class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
@@ -118,7 +120,8 @@ class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
class VEX_L { bit hasVEX_L = 1; }
class VEX_LIG { bit ignoresVEX_L = 1; }
class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; }
-
+class XOP_W { bit hasXOP_WPrefix = 1; }
+class XOP { bit hasXOP_Prefix = 1; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
string AsmStr, Domain d = GenericDomain>
: Instruction {
@@ -158,6 +161,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasVEX_L = 0; // Does this inst use large (256-bit) registers?
bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit
bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
+ bit hasXOP_WPrefix = 0; // Same bit as VEX_W, but used for swapping operands
+ bit hasXOP_Prefix = 0; // Does this inst require an XOP prefix?
// TSFlags layout should be kept in sync with X86InstrInfo.h.
let TSFlags{5-0} = FormBits;
@@ -179,6 +184,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{38} = hasVEX_L;
let TSFlags{39} = ignoresVEX_L;
let TSFlags{40} = has3DNow0F0FOpcode;
+ let TSFlags{41} = hasXOP_WPrefix;
+ let TSFlags{42} = hasXOP_Prefix;
}
class PseudoI<dag oops, dag iops, list<dag> pattern>
@@ -332,6 +339,10 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, TB,
Requires<[HasAVX]>;
+class VoPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+ Requires<[HasXMM]>;
// SSE2 Instruction Templates:
//
@@ -496,6 +507,30 @@ class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
OpSize, VEX_4V, Requires<[HasFMA3]>;
+// FMA4 Instruction Templates
+class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>;
+
+// XOP 2, 3 and 4 Operand Instruction Template
+class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
+ XOP, XOP9, Requires<[HasXOP]>;
+
+// XOP 2, 3 and 4 Operand Instruction Templates with imm byte
+class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
+ XOP, XOP8, Requires<[HasXOP]>;
+
+// XOP 5 operand instruction (VEX encoding!)
+class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ OpSize, VEX_4V, VEX_I8IMM, Requires<[HasXOP]>;
+
// X86-64 Instruction templates...
//
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 791bbe6..cd13bc4 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -130,28 +130,12 @@ def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
-def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
-def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
+def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
+def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
-def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
-def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
+def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>;
-def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
-def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
-def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>;
-def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>;
-
-def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>;
-def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>;
-def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>;
-def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
-
-def X86VPermilps : SDNode<"X86ISD::VPERMILPS", SDTShuff2OpI>;
-def X86VPermilpsy : SDNode<"X86ISD::VPERMILPSY", SDTShuff2OpI>;
-def X86VPermilpd : SDNode<"X86ISD::VPERMILPD", SDTShuff2OpI>;
-def X86VPermilpdy : SDNode<"X86ISD::VPERMILPDY", SDTShuff2OpI>;
-
-def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>;
+def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
@@ -363,12 +347,6 @@ def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
}]>;
-// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to
-// a PALIGNR imm.
-def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{
- return getI8Imm(X86::getShufflePALIGNRImmediate(N));
-}]>;
-
// EXTRACT_get_vextractf128_imm xform function: convert extract_subvector index
// to VEXTRACTF128 imm.
def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 24c4a53..7d1b9a1 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1528,9 +1528,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
// Build and insert into an implicit UNDEF value. This is OK because
// well be shifting and then extracting the lower 16-bits.
- BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
+ BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF),leaInReg2);
InsMI2 =
- BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
+ BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
.addReg(leaInReg2, RegState::Define, X86::sub_16bit)
.addReg(Src2, getKillRegState(isKill2));
addRegReg(MIB, leaInReg, true, leaInReg2, true);
@@ -2040,13 +2040,12 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
}
bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
- const MCInstrDesc &MCID = MI->getDesc();
- if (!MCID.isTerminator()) return false;
+ if (!MI->isTerminator()) return false;
// Conditional branch is a special case.
- if (MCID.isBranch() && !MCID.isBarrier())
+ if (MI->isBranch() && !MI->isBarrier())
return true;
- if (!MCID.isPredicable())
+ if (!MI->isPredicable())
return true;
return !isPredicated(MI);
}
@@ -2072,7 +2071,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// A terminator that isn't a branch can't easily be handled by this
// analysis.
- if (!I->getDesc().isBranch())
+ if (!I->isBranch())
return true;
// Handle unconditional branches.
@@ -2556,6 +2555,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
switch (MI->getOpcode()) {
case X86::V_SET0:
+ case X86::FsFLD0SS:
+ case X86::FsFLD0SD:
return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
case X86::TEST8ri_NOREX:
MI->setDesc(get(X86::TEST8ri));
@@ -2771,7 +2772,9 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
case X86::RCPSSr:
case X86::RCPSSr_Int:
case X86::ROUNDSDr:
+ case X86::ROUNDSDr_Int:
case X86::ROUNDSSr:
+ case X86::ROUNDSSr_Int:
case X86::RSQRTSSr:
case X86::RSQRTSSr_Int:
case X86::SQRTSSr:
@@ -2783,7 +2786,9 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
case X86::Int_VCVTSS2SDrr:
case X86::VRCPSSr:
case X86::VROUNDSDr:
+ case X86::VROUNDSDr_Int:
case X86::VROUNDSSr:
+ case X86::VROUNDSSr_Int:
case X86::VRSQRTSSr:
case X86::VSQRTSSr:
return true;
@@ -2911,11 +2916,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Alignment = 16;
break;
case X86::FsFLD0SD:
- case X86::VFsFLD0SD:
Alignment = 8;
break;
case X86::FsFLD0SS:
- case X86::VFsFLD0SS:
Alignment = 4;
break;
default:
@@ -2950,9 +2953,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
case X86::AVX_SETALLONES:
case X86::AVX2_SETALLONES:
case X86::FsFLD0SD:
- case X86::FsFLD0SS:
- case X86::VFsFLD0SD:
- case X86::VFsFLD0SS: {
+ case X86::FsFLD0SS: {
// Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
// Create a constant-pool entry and operands to load from it.
@@ -2978,9 +2979,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineConstantPool &MCP = *MF.getConstantPool();
Type *Ty;
unsigned Opc = LoadMI->getOpcode();
- if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS)
+ if (Opc == X86::FsFLD0SS)
Ty = Type::getFloatTy(MF.getFunction()->getContext());
- else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD)
+ else if (Opc == X86::FsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
@@ -3569,7 +3570,13 @@ static const unsigned ReplaceableInstrsAVX2[][3] = {
{ X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm },
{ X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr },
{ X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm },
- { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr }
+ { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr },
+ { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
+ { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
+ { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm },
+ { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr },
+ { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm },
+ { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr }
};
// FIXME: Some shuffle and unpack instructions have equivalents in different
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 35631d5..0bc3afa 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -473,6 +473,7 @@ def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
def HasAVX : Predicate<"Subtarget->hasAVX()">;
def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
+def HasXMM : Predicate<"Subtarget->hasXMM()">;
def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
@@ -480,6 +481,7 @@ def HasAES : Predicate<"Subtarget->hasAES()">;
def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">;
def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
+def HasXOP : Predicate<"Subtarget->hasXOP()">;
def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">;
def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">;
def HasF16C : Predicate<"Subtarget->hasF16C()">;
@@ -1502,6 +1504,9 @@ include "X86InstrFragmentsSIMD.td"
// FMA - Fused Multiply-Add support (requires FMA)
include "X86InstrFMA.td"
+// XOP
+include "X86InstrXOP.td"
+
// SSE, MMX and 3DNow! vector support.
include "X86InstrSSE.td"
include "X86InstrMMX.td"
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 7cadac1..345f606 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -240,21 +240,13 @@ let Predicates = [HasAVX] in {
}
// Alias instructions that map fld0 to pxor for sse.
-// FIXME: Set encoding to pseudo!
-let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
- canFoldAsLoad = 1 in {
- def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
- [(set FR32:$dst, fp32imm0)]>,
- Requires<[HasSSE1]>, TB, OpSize;
- def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
- [(set FR64:$dst, fpimm0)]>,
- Requires<[HasSSE2]>, TB, OpSize;
- def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
- [(set FR32:$dst, fp32imm0)]>,
- Requires<[HasAVX]>, TB, OpSize, VEX_4V;
- def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
- [(set FR64:$dst, fpimm0)]>,
- Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+// This is expanded by ExpandPostRAPseudos.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isPseudo = 1 in {
+ def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
+ [(set FR32:$dst, fp32imm0)]>, Requires<[HasXMM]>;
+ def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
+ [(set FR64:$dst, fpimm0)]>, Requires<[HasXMMInt]>;
}
//===----------------------------------------------------------------------===//
@@ -569,6 +561,16 @@ let Predicates = [HasAVX] in {
(EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
(VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+
+ // Move low f32 and clear high bits.
+ def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSrr (v4f32 (V_SET0)),
+ (EXTRACT_SUBREG (v8f32 VR256:$src), sub_ss)), sub_xmm)>;
+ def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSrr (v4i32 (V_SET0)),
+ (EXTRACT_SUBREG (v8i32 VR256:$src), sub_ss)), sub_xmm)>;
}
let AddedComplexity = 20 in {
@@ -596,6 +598,9 @@ let Predicates = [HasAVX] in {
// Represent the same patterns above but in the form they appear for
// 256-bit types
+ def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
(v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
@@ -613,6 +618,15 @@ let Predicates = [HasAVX] in {
(SUBREG_TO_REG (i64 0),
(v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
sub_xmm)>;
+ def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))),
+ (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+
+ // Move low f64 and clear high bits.
+ def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDrr (v2f64 (V_SET0)),
+ (EXTRACT_SUBREG (v4f64 VR256:$src), sub_sd)), sub_xmm)>;
// Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
@@ -634,6 +648,16 @@ let Predicates = [HasAVX] in {
(VMOVSSrr (v4f32 VR128:$src1),
(EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+ // 256-bit variants
+ def : Pat<(v8i32 (X86Movsd VR256:$src1, VR256:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_ss),
+ (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_ss)), sub_xmm)>;
+ def : Pat<(v8f32 (X86Movsd VR256:$src1, VR256:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_ss),
+ (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_ss)), sub_xmm)>;
+
// Shuffle with VMOVSD
def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
(VMOVSDrr VR128:$src1, FR64:$src2)>;
@@ -650,6 +674,17 @@ let Predicates = [HasAVX] in {
(VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
sub_sd))>;
+ // 256-bit variants
+ def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_sd),
+ (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_sd)), sub_xmm)>;
+ def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_sd),
+ (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_sd)), sub_xmm)>;
+
+
// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
// is during lowering, where it's not possible to recognize the fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
@@ -657,6 +692,9 @@ let Predicates = [HasAVX] in {
def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
(VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),
sub_sd))>;
+ def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
+ (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),
+ sub_sd))>;
def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
(VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
sub_sd))>;
@@ -761,6 +799,22 @@ let isCodeGenOnly = 1 in {
"movupd\t{$src, $dst|$dst, $src}", []>, VEX;
}
+let Predicates = [HasAVX] in {
+def : Pat<(v8i32 (X86vzmovl
+ (insert_subvector undef, (v4i32 VR128:$src), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+def : Pat<(v4i64 (X86vzmovl
+ (insert_subvector undef, (v2i64 VR128:$src), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+def : Pat<(v8f32 (X86vzmovl
+ (insert_subvector undef, (v4f32 VR128:$src), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+def : Pat<(v4f64 (X86vzmovl
+ (insert_subvector undef, (v2f64 VR128:$src), (i32 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+}
+
+
def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
(VMOVUPSYmr addr:$dst, VR256:$src)>;
@@ -1156,14 +1210,17 @@ let Predicates = [HasAVX] in {
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
(VMOVHPSrm VR128:$src1, addr:$src2)>;
def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
+ (VMOVHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1,
(bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
(VMOVHPSrm VR128:$src1, addr:$src2)>;
- // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+ // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
// is during lowering, where it's not possible to recognize the load fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
// fold opportunity reappears.
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))),
(VMOVHPDrm VR128:$src1, addr:$src2)>;
@@ -1174,10 +1231,10 @@ let Predicates = [HasAVX] in {
// Store patterns
def : Pat<(store (f64 (vector_extract
- (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+ (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
(VMOVHPSmr addr:$dst, VR128:$src)>;
def : Pat<(store (f64 (vector_extract
- (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+ (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
(VMOVHPDmr addr:$dst, VR128:$src)>;
}
@@ -1189,21 +1246,24 @@ let Predicates = [HasSSE1] in {
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
(MOVHPSrm VR128:$src1, addr:$src2)>;
def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1,
(bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
(MOVHPSrm VR128:$src1, addr:$src2)>;
// Store patterns
def : Pat<(store (f64 (vector_extract
- (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+ (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
(MOVHPSmr addr:$dst, VR128:$src)>;
}
let Predicates = [HasSSE2] in {
- // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+ // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
// is during lowering, where it's not possible to recognize the load fold cause
// it has two uses through a bitcast. One use disappears at isel time and the
// fold opportunity reappears.
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))),
(MOVHPDrm VR128:$src1, addr:$src2)>;
@@ -1214,7 +1274,7 @@ let Predicates = [HasSSE2] in {
// Store patterns
def : Pat<(store (f64 (vector_extract
- (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+ (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))),addr:$dst),
(MOVHPDmr addr:$dst, VR128:$src)>;
}
@@ -1943,7 +2003,7 @@ def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
// whenever possible to avoid declaring two versions of each one.
def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
(VCVTDQ2PSYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvtdq2_ps_256 (memopv8i32 addr:$src)),
+def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))),
(VCVTDQ2PSYrm addr:$src)>;
def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
@@ -2430,27 +2490,27 @@ let AddedComplexity = 10 in {
} // AddedComplexity
let Predicates = [HasSSE1] in {
- def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))),
(UNPCKLPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)),
(UNPCKLPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))),
(UNPCKHPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)),
(UNPCKHPSrr VR128:$src1, VR128:$src2)>;
}
let Predicates = [HasSSE2] in {
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))),
(UNPCKLPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)),
(UNPCKLPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))),
(UNPCKHPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)),
(UNPCKHPDrr VR128:$src1, VR128:$src2)>;
- // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the
+ // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
// problem is during lowering, where it's not possible to recognize the load
// fold cause it has two uses through a bitcast. One use disappears at isel
// time and the fold opportunity reappears.
@@ -2463,59 +2523,43 @@ let Predicates = [HasSSE2] in {
}
let Predicates = [HasAVX] in {
- def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))),
(VUNPCKLPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)),
(VUNPCKLPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))),
(VUNPCKHPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)),
(VUNPCKHPSrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v8f32 (X86Unpcklps VR256:$src1, (memopv8f32 addr:$src2))),
+ def : Pat<(v8f32 (X86Unpckl VR256:$src1, (memopv8f32 addr:$src2))),
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8f32 (X86Unpcklps VR256:$src1, VR256:$src2)),
- (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpcklps VR256:$src1, VR256:$src2)),
+ def : Pat<(v8f32 (X86Unpckl VR256:$src1, VR256:$src2)),
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpcklps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
- (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8f32 (X86Unpckhps VR256:$src1, (memopv8f32 addr:$src2))),
- (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8f32 (X86Unpckhps VR256:$src1, VR256:$src2)),
- (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpckhps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+ def : Pat<(v8f32 (X86Unpckh VR256:$src1, (memopv8f32 addr:$src2))),
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v8i32 (X86Unpckhps VR256:$src1, VR256:$src2)),
+ def : Pat<(v8f32 (X86Unpckh VR256:$src1, VR256:$src2)),
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))),
(VUNPCKLPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)),
(VUNPCKLPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))),
(VUNPCKHPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)),
(VUNPCKHPDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, (memopv4f64 addr:$src2))),
+ def : Pat<(v4f64 (X86Unpckl VR256:$src1, (memopv4f64 addr:$src2))),
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, VR256:$src2)),
+ def : Pat<(v4f64 (X86Unpckl VR256:$src1, VR256:$src2)),
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, (memopv4i64 addr:$src2))),
- (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, VR256:$src2)),
- (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, (memopv4f64 addr:$src2))),
- (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, VR256:$src2)),
- (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, (memopv4i64 addr:$src2))),
+ def : Pat<(v4f64 (X86Unpckh VR256:$src1, (memopv4f64 addr:$src2))),
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
- def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, VR256:$src2)),
+ def : Pat<(v4f64 (X86Unpckh VR256:$src1, VR256:$src2)),
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
- // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the
+ // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
// problem is during lowering, where it's not possible to recognize the load
// fold cause it has two uses through a bitcast. One use disappears at isel
// time and the fold opportunity reappears.
@@ -2869,7 +2913,7 @@ multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins ssmem:$src1, VR128:$src2),
+ (ins VR128:$src1, ssmem:$src2),
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
}
@@ -3198,13 +3242,13 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
//===----------------------------------------------------------------------===//
// Prefetch intrinsic.
-def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
+def PREFETCHT0 : VoPSI<0x18, MRM1m, (outs), (ins i8mem:$src),
"prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>;
-def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
+def PREFETCHT1 : VoPSI<0x18, MRM2m, (outs), (ins i8mem:$src),
"prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>;
-def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
+def PREFETCHT2 : VoPSI<0x18, MRM3m, (outs), (ins i8mem:$src),
"prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>;
-def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
+def PREFETCHNTA : VoPSI<0x18, MRM0m, (outs), (ins i8mem:$src),
"prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>;
// Flush cache
@@ -3652,6 +3696,8 @@ defm VPOR : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64,
i128mem, 1, 0>, VEX_4V;
defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64,
i128mem, 1, 0>, VEX_4V;
+defm VPANDN : PDI_binop_rm<0xDF, "vpandn", X86andnp, v2i64, VR128, memopv2i64,
+ i128mem, 0, 0>, VEX_4V;
let ExeDomain = SSEPackedInt in {
let neverHasSideEffects = 1 in {
@@ -3666,17 +3712,6 @@ let ExeDomain = SSEPackedInt in {
VEX_4V;
// PSRADQri doesn't exist in SSE[1-3].
}
- def VPANDNrr : PDI<0xDF, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (v2i64 (X86andnp VR128:$src1, VR128:$src2)))]>,VEX_4V;
-
- def VPANDNrm : PDI<0xDF, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (X86andnp VR128:$src1,
- (memopv2i64 addr:$src2)))]>, VEX_4V;
}
}
@@ -3714,6 +3749,8 @@ defm VPORY : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64,
i256mem, 1, 0>, VEX_4V;
defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64,
i256mem, 1, 0>, VEX_4V;
+defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64,
+ i256mem, 0, 0>, VEX_4V;
let ExeDomain = SSEPackedInt in {
let neverHasSideEffects = 1 in {
@@ -3728,17 +3765,6 @@ let ExeDomain = SSEPackedInt in {
VEX_4V;
// PSRADQYri doesn't exist in SSE[1-3].
}
- def VPANDNYrr : PDI<0xDF, MRMSrcReg,
- (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
- "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR256:$dst,
- (v4i64 (X86andnp VR256:$src1, VR256:$src2)))]>,VEX_4V;
-
- def VPANDNYrm : PDI<0xDF, MRMSrcMem,
- (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
- "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR256:$dst, (X86andnp VR256:$src1,
- (memopv4i64 addr:$src2)))]>, VEX_4V;
}
}
@@ -3776,6 +3802,8 @@ defm POR : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64,
i128mem, 1>;
defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64,
i128mem, 1>;
+defm PANDN : PDI_binop_rm<0xDF, "pandn", X86andnp, v2i64, VR128, memopv2i64,
+ i128mem, 0>;
let ExeDomain = SSEPackedInt in {
let neverHasSideEffects = 1 in {
@@ -3787,14 +3815,6 @@ let ExeDomain = SSEPackedInt in {
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
"psrldq\t{$src2, $dst|$dst, $src2}", []>;
// PSRADQri doesn't exist in SSE[1-3].
- def PANDNrr : PDI<0xDF, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}", []>;
-
- let mayLoad = 1 in
- def PANDNrm : PDI<0xDF, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}", []>;
}
}
} // Constraints = "$src1 = $dst"
@@ -4198,66 +4218,88 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
}
let Predicates = [HasAVX] in {
- defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpcklbw,
+ defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl,
bc_v16i8, 0>, VEX_4V;
- defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpcklwd,
+ defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl,
bc_v8i16, 0>, VEX_4V;
- defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq,
+ defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
bc_v4i32, 0>, VEX_4V;
- defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpcklqdq,
+ defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
bc_v2i64, 0>, VEX_4V;
- defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw,
+ defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh,
bc_v16i8, 0>, VEX_4V;
- defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckhwd,
+ defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh,
bc_v8i16, 0>, VEX_4V;
- defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq,
+ defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh,
bc_v4i32, 0>, VEX_4V;
- defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckhqdq,
+ defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh,
bc_v2i64, 0>, VEX_4V;
}
let Predicates = [HasAVX2] in {
- defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbw,
+ defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl,
bc_v32i8>, VEX_4V;
- defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwd,
+ defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl,
bc_v16i16>, VEX_4V;
- defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldq,
+ defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
bc_v8i32>, VEX_4V;
- defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpcklqdq,
+ defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
bc_v4i64>, VEX_4V;
- defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw,
+ defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh,
bc_v32i8>, VEX_4V;
- defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwd,
+ defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh,
bc_v16i16>, VEX_4V;
- defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdq,
+ defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh,
bc_v8i32>, VEX_4V;
- defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckhqdq,
+ defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh,
bc_v4i64>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
- defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw,
+ defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl,
bc_v16i8>;
- defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd,
+ defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl,
bc_v8i16>;
- defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq,
+ defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl,
bc_v4i32>;
- defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpcklqdq,
+ defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl,
bc_v2i64>;
- defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw,
+ defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh,
bc_v16i8>;
- defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd,
+ defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh,
bc_v8i16>;
- defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq,
+ defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh,
bc_v4i32>;
- defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckhqdq,
+ defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh,
bc_v2i64>;
}
} // ExeDomain = SSEPackedInt
+// Patterns for using AVX1 instructions with integer vectors
+// Here to give AVX2 priority
+let Predicates = [HasAVX] in {
+ def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+ (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
+ (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+ (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
+ (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
+ (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
+ (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
+ (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
+ (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
+}
+
// Splat v2f64 / v2i64
let AddedComplexity = 10 in {
def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
@@ -4784,7 +4826,7 @@ def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
// AVX 256-bit register conversion intrinsics
def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
(VCVTDQ2PDYrr VR128:$src)>;
-def : Pat<(int_x86_avx_cvtdq2_pd_256 (memopv4i32 addr:$src)),
+def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))),
(VCVTDQ2PDYrm addr:$src)>;
def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
@@ -4794,7 +4836,7 @@ def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
(VCVTDQ2PDYrr VR128:$src)>;
-def : Pat<(v4f64 (sint_to_fp (memopv4i32 addr:$src))),
+def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
(VCVTDQ2PDYrm addr:$src)>;
//===---------------------------------------------------------------------===//
@@ -5085,7 +5127,7 @@ let Constraints = "$src1 = $dst" in {
/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag128, Intrinsic IntId128> {
+ Intrinsic IntId128> {
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -5097,12 +5139,12 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(IntId128
- (bitconvert (mem_frag128 addr:$src))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src))))]>, OpSize;
}
/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag256, Intrinsic IntId256> {
+ Intrinsic IntId256> {
def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -5114,32 +5156,32 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
(IntId256
- (bitconvert (mem_frag256 addr:$src))))]>, OpSize;
+ (bitconvert (memopv4i64 addr:$src))))]>, OpSize;
}
let Predicates = [HasAVX] in {
- defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
+ defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb",
int_x86_ssse3_pabs_b_128>, VEX;
- defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16,
+ defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw",
int_x86_ssse3_pabs_w_128>, VEX;
- defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32,
+ defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd",
int_x86_ssse3_pabs_d_128>, VEX;
}
let Predicates = [HasAVX2] in {
- defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb", memopv32i8,
+ defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb",
int_x86_avx2_pabs_b>, VEX;
- defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw", memopv16i16,
+ defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw",
int_x86_avx2_pabs_w>, VEX;
- defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", memopv8i32,
+ defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd",
int_x86_avx2_pabs_d>, VEX;
}
-defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8,
+defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb",
int_x86_ssse3_pabs_b_128>;
-defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16,
+defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw",
int_x86_ssse3_pabs_w_128>;
-defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
+defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
int_x86_ssse3_pabs_d_128>;
//===---------------------------------------------------------------------===//
@@ -5148,8 +5190,7 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag128, Intrinsic IntId128,
- bit Is2Addr = 1> {
+ Intrinsic IntId128, bit Is2Addr = 1> {
let isCommutable = 1 in
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
@@ -5165,11 +5206,11 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(IntId128 VR128:$src1,
- (bitconvert (mem_frag128 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
}
multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag256, Intrinsic IntId256> {
+ Intrinsic IntId256> {
let isCommutable = 1 in
def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2),
@@ -5181,94 +5222,94 @@ multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(IntId256 VR256:$src1,
- (bitconvert (mem_frag256 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
}
let ImmT = NoImm, Predicates = [HasAVX] in {
let isCommutable = 0 in {
- defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
+ defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw",
int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
- defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv4i32,
+ defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd",
int_x86_ssse3_phadd_d_128, 0>, VEX_4V;
- defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv8i16,
+ defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
- defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv8i16,
+ defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw",
int_x86_ssse3_phsub_w_128, 0>, VEX_4V;
- defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv4i32,
+ defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd",
int_x86_ssse3_phsub_d_128, 0>, VEX_4V;
- defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv8i16,
+ defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw",
int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
- defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv16i8,
+ defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw",
int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V;
- defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv16i8,
+ defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb",
int_x86_ssse3_pshuf_b_128, 0>, VEX_4V;
- defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv16i8,
+ defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb",
int_x86_ssse3_psign_b_128, 0>, VEX_4V;
- defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv8i16,
+ defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw",
int_x86_ssse3_psign_w_128, 0>, VEX_4V;
- defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv4i32,
+ defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd",
int_x86_ssse3_psign_d_128, 0>, VEX_4V;
}
-defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16,
+defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw",
int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
}
let ImmT = NoImm, Predicates = [HasAVX2] in {
let isCommutable = 0 in {
- defm VPHADDW : SS3I_binop_rm_int_y<0x01, "vphaddw", memopv16i16,
+ defm VPHADDW : SS3I_binop_rm_int_y<0x01, "vphaddw",
int_x86_avx2_phadd_w>, VEX_4V;
- defm VPHADDD : SS3I_binop_rm_int_y<0x02, "vphaddd", memopv8i32,
+ defm VPHADDD : SS3I_binop_rm_int_y<0x02, "vphaddd",
int_x86_avx2_phadd_d>, VEX_4V;
- defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", memopv16i16,
+ defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
int_x86_avx2_phadd_sw>, VEX_4V;
- defm VPHSUBW : SS3I_binop_rm_int_y<0x05, "vphsubw", memopv16i16,
+ defm VPHSUBW : SS3I_binop_rm_int_y<0x05, "vphsubw",
int_x86_avx2_phsub_w>, VEX_4V;
- defm VPHSUBD : SS3I_binop_rm_int_y<0x06, "vphsubd", memopv8i32,
+ defm VPHSUBD : SS3I_binop_rm_int_y<0x06, "vphsubd",
int_x86_avx2_phsub_d>, VEX_4V;
- defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", memopv16i16,
+ defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw",
int_x86_avx2_phsub_sw>, VEX_4V;
- defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", memopv32i8,
+ defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw",
int_x86_avx2_pmadd_ub_sw>, VEX_4V;
- defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", memopv32i8,
+ defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb",
int_x86_avx2_pshuf_b>, VEX_4V;
- defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv32i8,
+ defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb",
int_x86_avx2_psign_b>, VEX_4V;
- defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv16i16,
+ defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw",
int_x86_avx2_psign_w>, VEX_4V;
- defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv8i32,
+ defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd",
int_x86_avx2_psign_d>, VEX_4V;
}
-defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", memopv16i16,
+defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw",
int_x86_avx2_pmul_hr_sw>, VEX_4V;
}
// None of these have i8 immediate fields.
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
- defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16,
+ defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw",
int_x86_ssse3_phadd_w_128>;
- defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32,
+ defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd",
int_x86_ssse3_phadd_d_128>;
- defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16,
+ defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw",
int_x86_ssse3_phadd_sw_128>;
- defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16,
+ defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw",
int_x86_ssse3_phsub_w_128>;
- defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32,
+ defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd",
int_x86_ssse3_phsub_d_128>;
- defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16,
+ defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw",
int_x86_ssse3_phsub_sw_128>;
- defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8,
+ defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw",
int_x86_ssse3_pmadd_ub_sw_128>;
- defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv16i8,
+ defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb",
int_x86_ssse3_pshuf_b_128>;
- defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv16i8,
+ defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb",
int_x86_ssse3_psign_b_128>;
- defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv8i16,
+ defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw",
int_x86_ssse3_psign_w_128>;
- defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32,
+ defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd",
int_x86_ssse3_psign_d_128>;
}
-defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16,
+defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw",
int_x86_ssse3_pmul_hr_sw_128>;
}
@@ -6017,8 +6058,18 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
Intrinsic F32Int,
Intrinsic F64Int, bit Is2Addr = 1> {
let ExeDomain = GenericDomain in {
- // Intrinsic operation, reg.
+ // Operation, reg.
def SSr : SS4AIi8<opcss, MRMSrcReg,
+ (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ []>, OpSize;
+
+ // Intrinsic operation, reg.
+ def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
@@ -6040,8 +6091,18 @@ let ExeDomain = GenericDomain in {
(F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
OpSize;
- // Intrinsic operation, reg.
+ // Operation, reg.
def SDr : SS4AIi8<opcsd, MRMSrcReg,
+ (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ []>, OpSize;
+
+ // Intrinsic operation, reg.
+ def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
@@ -6079,6 +6140,27 @@ let Predicates = [HasAVX] in {
defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
int_x86_sse41_round_ss,
int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
+
+ def : Pat<(ffloor FR32:$src),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+ def : Pat<(f64 (ffloor FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+ def : Pat<(f32 (fnearbyint FR32:$src)),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
+ def : Pat<(f64 (fnearbyint FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
+ def : Pat<(f32 (fceil FR32:$src)),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+ def : Pat<(f64 (fceil FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+ def : Pat<(f32 (frint FR32:$src)),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
+ def : Pat<(f64 (frint FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
+ def : Pat<(f32 (ftrunc FR32:$src)),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+ def : Pat<(f64 (ftrunc FR64:$src)),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
}
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
@@ -6088,6 +6170,27 @@ let Constraints = "$src1 = $dst" in
defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
+def : Pat<(ffloor FR32:$src),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+def : Pat<(f64 (ffloor FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+def : Pat<(f32 (fnearbyint FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
+def : Pat<(f64 (fnearbyint FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
+def : Pat<(f32 (fceil FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+def : Pat<(f64 (fceil FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+def : Pat<(f32 (frint FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
+def : Pat<(f64 (frint FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
+def : Pat<(f32 (ftrunc FR32:$src)),
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+def : Pat<(f64 (ftrunc FR64:$src)),
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+
//===----------------------------------------------------------------------===//
// SSE4.1 - Packed Bit Test
//===----------------------------------------------------------------------===//
@@ -6195,7 +6298,7 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(IntId128
- (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src))))]>, OpSize;
}
let Predicates = [HasAVX] in
@@ -6221,7 +6324,7 @@ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
}
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
@@ -6237,7 +6340,7 @@ multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(IntId256 VR256:$src1,
- (bitconvert (memopv32i8 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
}
let Predicates = [HasAVX] in {
@@ -6400,38 +6503,38 @@ let Predicates = [HasAVX] in {
let isCommutable = 0 in {
let ExeDomain = SSEPackedSingle in {
defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ VR128, memopv4f32, i128mem, 0>, VEX_4V;
defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
- int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ int_x86_avx_blend_ps_256, VR256, memopv8f32, i256mem, 0>, VEX_4V;
}
let ExeDomain = SSEPackedDouble in {
defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ VR128, memopv2f64, i128mem, 0>, VEX_4V;
defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
- int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ int_x86_avx_blend_pd_256, VR256, memopv4f64, i256mem, 0>, VEX_4V;
}
defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ VR128, memopv2i64, i128mem, 0>, VEX_4V;
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ VR128, memopv2i64, i128mem, 0>, VEX_4V;
}
let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ VR128, memopv4f32, i128mem, 0>, VEX_4V;
let ExeDomain = SSEPackedDouble in
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
- VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ VR128, memopv2f64, i128mem, 0>, VEX_4V;
let ExeDomain = SSEPackedSingle in
defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
- VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ VR256, memopv8f32, i256mem, 0>, VEX_4V;
}
let Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
- VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ VR256, memopv4i64, i256mem, 0>, VEX_4V;
defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
- VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ VR256, memopv4i64, i256mem, 0>, VEX_4V;
}
}
@@ -6439,35 +6542,35 @@ let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
let ExeDomain = SSEPackedSingle in
defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
- VR128, memopv16i8, i128mem>;
+ VR128, memopv4f32, i128mem>;
let ExeDomain = SSEPackedDouble in
defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
- VR128, memopv16i8, i128mem>;
+ VR128, memopv2f64, i128mem>;
defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
- VR128, memopv16i8, i128mem>;
+ VR128, memopv2i64, i128mem>;
defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
- VR128, memopv16i8, i128mem>;
+ VR128, memopv2i64, i128mem>;
}
let ExeDomain = SSEPackedSingle in
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
- VR128, memopv16i8, i128mem>;
+ VR128, memopv4f32, i128mem>;
let ExeDomain = SSEPackedDouble in
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
- VR128, memopv16i8, i128mem>;
+ VR128, memopv2f64, i128mem>;
}
/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
RegisterClass RC, X86MemOperand x86memop,
PatFrag mem_frag, Intrinsic IntId> {
- def rr : I<opc, MRMSrcReg, (outs RC:$dst),
+ def rr : Ii8<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
- def rm : I<opc, MRMSrcMem, (outs RC:$dst),
+ def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
@@ -6480,23 +6583,23 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedDouble in {
defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem,
- memopv16i8, int_x86_sse41_blendvpd>;
+ memopv2f64, int_x86_sse41_blendvpd>;
defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
- memopv32i8, int_x86_avx_blendv_pd_256>;
+ memopv4f64, int_x86_avx_blendv_pd_256>;
} // ExeDomain = SSEPackedDouble
let ExeDomain = SSEPackedSingle in {
defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
- memopv16i8, int_x86_sse41_blendvps>;
+ memopv4f32, int_x86_sse41_blendvps>;
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
- memopv32i8, int_x86_avx_blendv_ps_256>;
+ memopv8f32, int_x86_avx_blendv_ps_256>;
} // ExeDomain = SSEPackedSingle
defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
- memopv16i8, int_x86_sse41_pblendvb>;
+ memopv2i64, int_x86_sse41_pblendvb>;
}
let Predicates = [HasAVX2] in {
defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem,
- memopv32i8, int_x86_avx2_pblendvb>;
+ memopv4i64, int_x86_avx2_pblendvb>;
}
let Predicates = [HasAVX] in {
@@ -6537,7 +6640,8 @@ let Predicates = [HasAVX2] in {
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
- multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
+ Intrinsic IntId> {
def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr,
@@ -6551,15 +6655,18 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
"\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst,
(IntId VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
+ (bitconvert (mem_frag addr:$src2)), XMM0))]>, OpSize;
}
}
let ExeDomain = SSEPackedDouble in
-defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64,
+ int_x86_sse41_blendvpd>;
let ExeDomain = SSEPackedSingle in
-defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
-defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32,
+ int_x86_sse41_blendvps>;
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64,
+ int_x86_sse41_pblendvb>;
let Predicates = [HasSSE41] in {
def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1),
@@ -6614,8 +6721,7 @@ multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+ (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize;
}
/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
@@ -6630,8 +6736,7 @@ multiclass SS42I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
- (IntId256 VR256:$src1,
- (bitconvert (memopv32i8 addr:$src2))))]>, OpSize;
+ (IntId256 VR256:$src1, (memopv4i64 addr:$src2)))]>, OpSize;
}
let Predicates = [HasAVX] in {
@@ -6913,7 +7018,7 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
}
// Perform One Round of an AES Encryption/Decryption Flow
@@ -7144,7 +7249,7 @@ def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
//===----------------------------------------------------------------------===//
// VINSERTF128 - Insert packed floating-point values
//
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR128:$src2, i8imm:$src3),
"vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -7163,35 +7268,10 @@ def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
(VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
-def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
- (i32 imm)),
- (VINSERTF128rr VR256:$src1, VR128:$src2,
- (INSERT_get_vinsertf128_imm VR256:$ins))>;
-
//===----------------------------------------------------------------------===//
// VEXTRACTF128 - Extract packed floating-point values
//
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
(ins VR256:$src1, i8imm:$src2),
"vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -7210,31 +7290,6 @@ def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
(VEXTRACTF128rr VR256:$src1, imm:$src2)>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v4f32 (VEXTRACTF128rr
- (v8f32 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v2f64 (VEXTRACTF128rr
- (v4f64 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v4i32 (VEXTRACTF128rr
- (v8i32 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v2i64 (VEXTRACTF128rr
- (v4i64 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v8i16 (VEXTRACTF128rr
- (v16i16 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v16i8 (VEXTRACTF128rr
- (v32i8 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-
//===----------------------------------------------------------------------===//
// VMASKMOV - Conditional SIMD Packed Loads and Stores
//
@@ -7288,7 +7343,8 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop_i:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V;
+ [(set RC:$dst, (IntVar RC:$src1,
+ (bitconvert (i_frag addr:$src2))))]>, VEX_4V;
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, i8imm:$src2),
@@ -7302,11 +7358,11 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
let ExeDomain = SSEPackedSingle in {
defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
- memopv4f32, memopv4i32,
+ memopv4f32, memopv2i64,
int_x86_avx_vpermilvar_ps,
int_x86_avx_vpermil_ps>;
defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
- memopv8f32, memopv8i32,
+ memopv8f32, memopv4i64,
int_x86_avx_vpermilvar_ps_256,
int_x86_avx_vpermil_ps_256>;
}
@@ -7321,19 +7377,28 @@ let ExeDomain = SSEPackedDouble in {
int_x86_avx_vpermil_pd_256>;
}
-def : Pat<(v8f32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))),
+def : Pat<(v8f32 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
(VPERMILPSYri VR256:$src1, imm:$imm)>;
-def : Pat<(v4f64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))),
+def : Pat<(v4f64 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
(VPERMILPDYri VR256:$src1, imm:$imm)>;
-def : Pat<(v8i32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))),
+def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
(VPERMILPSYri VR256:$src1, imm:$imm)>;
-def : Pat<(v4i64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))),
+def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
(VPERMILPDYri VR256:$src1, imm:$imm)>;
+def : Pat<(v8f32 (X86VPermilp (memopv8f32 addr:$src1), (i8 imm:$imm))),
+ (VPERMILPSYmi addr:$src1, imm:$imm)>;
+def : Pat<(v4f64 (X86VPermilp (memopv4f64 addr:$src1), (i8 imm:$imm))),
+ (VPERMILPDYmi addr:$src1, imm:$imm)>;
+def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (memopv4i64 addr:$src1)),
+ (i8 imm:$imm))),
+ (VPERMILPSYmi addr:$src1, imm:$imm)>;
+def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))),
+ (VPERMILPDYmi addr:$src1, imm:$imm)>;
//===----------------------------------------------------------------------===//
// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
//
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -7359,22 +7424,9 @@ def : Pat<(int_x86_avx_vperm2f128_pd_256
VR256:$src1, (memopv4f64 addr:$src2), imm:$src3),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
def : Pat<(int_x86_avx_vperm2f128_si_256
- VR256:$src1, (memopv8i32 addr:$src2), imm:$src3),
+ VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), imm:$src3),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
-def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v8i32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v4i64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v4f64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-
//===----------------------------------------------------------------------===//
// VZERO - Zero YMM registers
//
@@ -7451,9 +7503,9 @@ multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
let isCommutable = 0 in {
defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
- VR128, memopv16i8, i128mem>;
+ VR128, memopv2i64, i128mem>;
defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
- VR256, memopv32i8, i256mem>;
+ VR256, memopv4i64, i256mem>;
}
//===----------------------------------------------------------------------===//
@@ -7541,11 +7593,12 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (Int VR256:$src1, (mem_frag addr:$src2)))]>,
+ [(set VR256:$dst, (Int VR256:$src1,
+ (bitconvert (mem_frag addr:$src2))))]>,
VEX_4V;
}
-defm VPERMD : avx2_perm<0x36, "vpermd", memopv8i32, int_x86_avx2_permd>;
+defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, int_x86_avx2_permd>;
let ExeDomain = SSEPackedSingle in
defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>;
@@ -7571,7 +7624,7 @@ defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>,
VEX_W;
//===----------------------------------------------------------------------===//
-// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
+// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
//
def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
@@ -7587,6 +7640,64 @@ def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
imm:$src3))]>,
VEX_4V;
+let Predicates = [HasAVX2] in {
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (memopv4i64 addr:$src2)),
+ (i8 imm:$imm))),
+ (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
+ (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)),
+ (i8 imm:$imm))),
+ (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2),
+ (i8 imm:$imm))),
+ (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+}
+
+// AVX1 patterns
+def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+
+def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1,
+ (memopv8f32 addr:$src2), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1,
+ (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
+ (memopv4i64 addr:$src2), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1,
+ (memopv4f64 addr:$src2), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1,
+ (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
+ (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+
+
//===----------------------------------------------------------------------===//
// VINSERTI128 - Insert packed integer values
//
@@ -7603,6 +7714,51 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
(int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2),
imm:$src3))]>, VEX_4V;
+let Predicates = [HasAVX2] in {
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTI128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTI128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
+ (i32 imm)),
+ (VINSERTI128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
+ (i32 imm)),
+ (VINSERTI128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+}
+
+// AVX1 patterns
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
+ (i32 imm)),
+ (VINSERTF128rr VR256:$src1, VR128:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+
//===----------------------------------------------------------------------===//
// VEXTRACTI128 - Extract packed integer values
//
@@ -7617,6 +7773,51 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
(ins i128mem:$dst, VR256:$src1, i8imm:$src2),
"vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX;
+let Predicates = [HasAVX2] in {
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2i64 (VEXTRACTI128rr
+ (v4i64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4i32 (VEXTRACTI128rr
+ (v8i32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v8i16 (VEXTRACTI128rr
+ (v16i16 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v16i8 (VEXTRACTI128rr
+ (v32i8 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+}
+
+// AVX1 patterns
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4f32 (VEXTRACTF128rr
+ (v8f32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2f64 (VEXTRACTF128rr
+ (v4f64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2i64 (VEXTRACTF128rr
+ (v4i64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4i32 (VEXTRACTF128rr
+ (v8i32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v8i16 (VEXTRACTF128rr
+ (v16i16 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v16i8 (VEXTRACTF128rr
+ (v32i8 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+
//===----------------------------------------------------------------------===//
// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores
//
diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td
new file mode 100644
index 0000000..64cc44d
--- /dev/null
+++ b/lib/Target/X86/X86InstrXOP.td
@@ -0,0 +1,243 @@
+//====- X86InstrXOP.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes XOP (eXtended OPerations)
+//
+//===----------------------------------------------------------------------===//
+
+multiclass xop2op<bits<8> opc, string OpcodeStr, X86MemOperand x86memop> {
+ def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>, VEX;
+ def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>, VEX;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPHSUBWD : xop2op<0xE2, "vphsubwd", f128mem>;
+ defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", f128mem>;
+ defm VPHSUBBW : xop2op<0xE1, "vphsubbw", f128mem>;
+ defm VPHADDWQ : xop2op<0xC7, "vphaddwq", f128mem>;
+ defm VPHADDWD : xop2op<0xC6, "vphaddwd", f128mem>;
+ defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", f128mem>;
+ defm VPHADDUWD : xop2op<0xD6, "vphadduwd", f128mem>;
+ defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", f128mem>;
+ defm VPHADDUBW : xop2op<0xD1, "vphaddubw", f128mem>;
+ defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", f128mem>;
+ defm VPHADDUBD : xop2op<0xD2, "vphaddubd", f128mem>;
+ defm VPHADDDQ : xop2op<0xCB, "vphadddq", f128mem>;
+ defm VPHADDBW : xop2op<0xC1, "vphaddbw", f128mem>;
+ defm VPHADDBQ : xop2op<0xC3, "vphaddbq", f128mem>;
+ defm VPHADDBD : xop2op<0xC2, "vphaddbd", f128mem>;
+ defm VFRCZSS : xop2op<0x82, "vfrczss", f32mem>;
+ defm VFRCZSD : xop2op<0x83, "vfrczsd", f64mem>;
+ defm VFRCZPS : xop2op<0x80, "vfrczps", f128mem>;
+ defm VFRCZPD : xop2op<0x81, "vfrczpd", f128mem>;
+}
+
+multiclass xop2op256<bits<8> opc, string OpcodeStr> {
+ def rrY : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>, VEX, VEX_L;
+ def rmY : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>, VEX;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VFRCZPS : xop2op256<0x80, "vfrczps">;
+ defm VFRCZPD : xop2op256<0x81, "vfrczpd">;
+}
+
+multiclass xop3op<bits<8> opc, string OpcodeStr> {
+ def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4VOp3;
+ def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4V, VEX_W;
+ def mr : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX_4VOp3;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPSHLW : xop3op<0x95, "vpshlw">;
+ defm VPSHLQ : xop3op<0x97, "vpshlq">;
+ defm VPSHLD : xop3op<0x96, "vpshld">;
+ defm VPSHLB : xop3op<0x94, "vpshlb">;
+ defm VPSHAW : xop3op<0x99, "vpshaw">;
+ defm VPSHAQ : xop3op<0x9B, "vpshaq">;
+ defm VPSHAD : xop3op<0x9A, "vpshad">;
+ defm VPSHAB : xop3op<0x98, "vpshab">;
+ defm VPROTW : xop3op<0x91, "vprotw">;
+ defm VPROTQ : xop3op<0x93, "vprotq">;
+ defm VPROTD : xop3op<0x92, "vprotd">;
+ defm VPROTB : xop3op<0x90, "vprotb">;
+}
+
+multiclass xop3opimm<bits<8> opc, string OpcodeStr> {
+ def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX;
+ def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, VEX;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPROTW : xop3opimm<0xC1, "vprotw">;
+ defm VPROTQ : xop3opimm<0xC3, "vprotq">;
+ defm VPROTD : xop3opimm<0xC2, "vprotd">;
+ defm VPROTB : xop3opimm<0xC0, "vprotb">;
+}
+
+// Instruction where second source can be memory, but third must be register
+multiclass xop4opm2<bits<8> opc, string OpcodeStr> {
+ def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_4V, VEX_I8IMM;
+ def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_4V, VEX_I8IMM;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd">;
+ defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd">;
+ defm VPMACSWW : xop4opm2<0x95, "vpmacsww">;
+ defm VPMACSWD : xop4opm2<0x96, "vpmacswd">;
+ defm VPMACSSWW : xop4opm2<0x85, "vpmacssww">;
+ defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd">;
+ defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql">;
+ defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh">;
+ defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd">;
+ defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql">;
+ defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh">;
+ defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd">;
+}
+
+// Instruction where second source can be memory, third must be imm8
+multiclass xop4opimm<bits<8> opc, string OpcodeStr> {
+ def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_4V;
+ def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_4V;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPCOMW : xop4opimm<0xCD, "vpcomw">;
+ defm VPCOMUW : xop4opimm<0xED, "vpcomuw">;
+ defm VPCOMUQ : xop4opimm<0xEF, "vpcomuq">;
+ defm VPCOMUD : xop4opimm<0xEE, "vpcomud">;
+ defm VPCOMUB : xop4opimm<0xEC, "vpcomub">;
+ defm VPCOMQ : xop4opimm<0xCF, "vpcomq">;
+ defm VPCOMD : xop4opimm<0xCE, "vpcomd">;
+ defm VPCOMB : xop4opimm<0xCC, "vpcomb">;
+}
+
+// Instruction where either second or third source can be memory
+multiclass xop4op<bits<8> opc, string OpcodeStr> {
+ def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_4V, VEX_I8IMM;
+ def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_4V, VEX_I8IMM, XOP_W;
+ def mr : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_4V, VEX_I8IMM;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPPERM : xop4op<0xA3, "vpperm">;
+ defm VPCMOV : xop4op<0xA2, "vpcmov">;
+}
+
+multiclass xop4op256<bits<8> opc, string OpcodeStr> {
+ def rrY : IXOPi8<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_4V, VEX_I8IMM;
+ def rmY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_4V, VEX_I8IMM, XOP_W;
+ def mrY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_4V, VEX_I8IMM;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPCMOV : xop4op256<0xA2, "vpcmov">;
+}
+
+multiclass xop5op<bits<8> opc, string OpcodeStr> {
+ def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ []>;
+ def rm : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ []>, XOP_W;
+ def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, VR128:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ []>;
+ def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ []>;
+ def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, f256mem:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ []>, XOP_W;
+ def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, VR256:$src3, i8imm:$src4),
+ !strconcat(OpcodeStr,
+ "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+ []>;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VPERMIL2PD : xop5op<0x49, "vpermil2pd">;
+ defm VPERMIL2PS : xop5op<0x48, "vpermil2ps">;
+}
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 3f88fa6..2145a33 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -424,7 +424,9 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
TargetJITInfo::LazyResolverFn
X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ TsanIgnoreWritesBegin();
JITCompilerFunction = F;
+ TsanIgnoreWritesEnd();
#if defined (X86_32_JIT) && !defined (_MSC_VER)
if (Subtarget->hasSSE1())
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 81ee665..9232196 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -368,10 +368,6 @@ ReSimplify:
case X86::SETB_C64r: LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break;
case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
- case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::VFsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
- case X86::VFsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index c1ac9f3..4e80432 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -452,7 +452,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- return (RealignStack &&
+ return (MF.getTarget().Options.RealignStack &&
!MFI->hasVarSizedObjects());
}
@@ -583,7 +583,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// sure we restore the stack pointer immediately after the call, there may
// be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
MachineBasicBlock::iterator B = MBB.begin();
- while (I != B && !llvm::prior(I)->getDesc().isCall())
+ while (I != B && !llvm::prior(I)->isCall())
--I;
MBB.insert(I, New);
}
@@ -665,7 +665,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
case MVT::i8:
if (High) {
switch (Reg) {
- default: return 0;
+ default: return getX86SubSuperRegister(Reg, MVT::i64, High);
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AH;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -785,6 +785,22 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
return X86::R15D;
}
case MVT::i64:
+ // For 64-bit mode if we've requested a "high" register and the
+ // Q or r constraints we want one of these high registers or
+ // just the register name otherwise.
+ if (High) {
+ switch (Reg) {
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SP;
+ // Fallthrough.
+ }
+ }
switch (Reg) {
default: return Reg;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index e7bcbf8..6e092c7 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -273,6 +273,8 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
if (IsAMD && ((ECX >> 16) & 0x1)) {
HasFMA4 = true;
ToggleFeature(X86::FeatureFMA4);
+ HasXOP = true;
+ ToggleFeature(X86::FeatureXOP);
}
}
}
@@ -317,6 +319,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
, HasCLMUL(false)
, HasFMA3(false)
, HasFMA4(false)
+ , HasXOP(false)
, HasMOVBE(false)
, HasRDRAND(false)
, HasF16C(false)
@@ -387,9 +390,6 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
assert((!In64BitMode || HasX86_64) &&
"64-bit code requested on a subtarget that doesn't support it!");
- if(EnableSegmentedStacks && !isTargetELF())
- report_fatal_error("Segmented stacks are only implemented on ELF.");
-
// Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both
// 32 and 64 bit) and for all 64-bit targets.
if (StackAlignOverride)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index e93f8e9..ccb9be0 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -93,6 +93,9 @@ protected:
/// HasFMA4 - Target has 4-operand fused multiply-add
bool HasFMA4;
+ /// HasXOP - Target has XOP instructions
+ bool HasXOP;
+
/// HasMOVBE - True if the processor has the MOVBE instruction.
bool HasMOVBE;
@@ -198,6 +201,7 @@ public:
bool hasCLMUL() const { return HasCLMUL; }
bool hasFMA3() const { return HasFMA3; }
bool hasFMA4() const { return HasFMA4; }
+ bool hasXOP() const { return HasXOP; }
bool hasMOVBE() const { return HasMOVBE; }
bool hasRDRAND() const { return HasRDRAND; }
bool hasF16C() const { return HasF16C; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 1c9f3bd..126042e 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -31,9 +31,10 @@ extern "C" void LLVMInitializeX86Target() {
X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : X86TargetMachine(T, TT, CPU, FS, RM, CM, OL, false),
+ : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false),
DataLayout(getSubtargetImpl()->isTargetDarwin() ?
"e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-"
"n8:16:32-S128" :
@@ -52,9 +53,10 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : X86TargetMachine(T, TT, CPU, FS, RM, CM, OL, true),
+ : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true),
DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
"n8:16:32:64-S128"),
InstrInfo(*this),
@@ -67,11 +69,12 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
///
X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
- Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, Options.StackAlignmentOverride, is64Bit),
FrameLowering(*this, Subtarget),
ELFWriterInfo(is64Bit, true) {
// Determine the PICStyle based on the target selected.
@@ -95,8 +98,11 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
}
// default to hard float ABI
- if (FloatABIType == FloatABI::Default)
- FloatABIType = FloatABI::Hard;
+ if (Options.FloatABIType == FloatABI::Default)
+ this->Options.FloatABIType = FloatABI::Hard;
+
+ if (Options.EnableSegmentedStacks && !Subtarget.isTargetELF())
+ report_fatal_error("Segmented stacks are only implemented on ELF.");
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 64be458..3ac1769 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -38,7 +38,7 @@ class X86TargetMachine : public LLVMTargetMachine {
public:
X86TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool is64Bit);
@@ -85,7 +85,7 @@ class X86_32TargetMachine : public X86TargetMachine {
X86JITInfo JITInfo;
public:
X86_32TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
virtual const TargetData *getTargetData() const { return &DataLayout; }
@@ -113,7 +113,7 @@ class X86_64TargetMachine : public X86TargetMachine {
X86JITInfo JITInfo;
public:
X86_64TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
virtual const TargetData *getTargetData() const { return &DataLayout; }
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 9bb54a8..f8c30eb 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -220,7 +220,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
MachineInstr *MI = I;
DebugLoc dl = I->getDebugLoc();
- bool isControlFlow = MI->getDesc().isCall() || MI->getDesc().isReturn();
+ bool isControlFlow = MI->isCall() || MI->isReturn();
// Shortcut: don't need to check regular instructions in dirty state.
if (!isControlFlow && CurState == ST_DIRTY)
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index d91da8c..de4abfc 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -21,17 +21,5 @@ add_llvm_target(XCoreCodeGen
XCoreSelectionDAGInfo.cpp
)
-add_llvm_library_dependencies(LLVMXCoreCodeGen
- LLVMAsmPrinter
- LLVMCodeGen
- LLVMCore
- LLVMMC
- LLVMSelectionDAG
- LLVMSupport
- LLVMTarget
- LLVMXCoreDesc
- LLVMXCoreInfo
- )
-
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/XCore/LLVMBuild.txt b/lib/Target/XCore/LLVMBuild.txt
index 1f7e2d5..53b4a9e 100644
--- a/lib/Target/XCore/LLVMBuild.txt
+++ b/lib/Target/XCore/LLVMBuild.txt
@@ -15,6 +15,9 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = MCTargetDesc TargetInfo
+
[component_0]
type = TargetGroup
name = XCore
@@ -27,4 +30,3 @@ name = XCoreCodeGen
parent = XCore
required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support Target XCoreDesc XCoreInfo
add_to_library_groups = XCore
-
diff --git a/lib/Target/XCore/MCTargetDesc/CMakeLists.txt b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
index 269822d..3a3f5b4 100644
--- a/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
@@ -3,11 +3,6 @@ add_llvm_library(LLVMXCoreDesc
XCoreMCAsmInfo.cpp
)
-add_llvm_library_dependencies(LLVMXCoreDesc
- LLVMMC
- LLVMXCoreInfo
- )
-
add_dependencies(LLVMXCoreDesc XCoreCommonTableGen)
# Hack: we need to include 'main' target directory to grab private headers
diff --git a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
index 628afb5..a80c939 100644
--- a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = XCoreDesc
parent = XCore
required_libraries = MC XCoreInfo
add_to_library_groups = XCore
-
diff --git a/lib/Target/XCore/TargetInfo/CMakeLists.txt b/lib/Target/XCore/TargetInfo/CMakeLists.txt
index 7f84f69..2c34b87 100644
--- a/lib/Target/XCore/TargetInfo/CMakeLists.txt
+++ b/lib/Target/XCore/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMXCoreInfo
XCoreTargetInfo.cpp
)
-add_llvm_library_dependencies(LLVMXCoreInfo
- LLVMMC
- LLVMSupport
- LLVMTarget
- )
-
add_dependencies(LLVMXCoreInfo XCoreCommonTableGen)
diff --git a/lib/Target/XCore/TargetInfo/LLVMBuild.txt b/lib/Target/XCore/TargetInfo/LLVMBuild.txt
index d0b8e54..770ba87 100644
--- a/lib/Target/XCore/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/XCore/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = XCoreInfo
parent = XCore
required_libraries = MC Support Target
add_to_library_groups = XCore
-
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
index b8fb0ca..08f091e 100644
--- a/lib/Target/XCore/XCore.h
+++ b/lib/Target/XCore/XCore.h
@@ -24,7 +24,8 @@ namespace llvm {
class XCoreTargetMachine;
class formatted_raw_ostream;
- FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM);
+ FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
} // end namespace llvm;
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 7f8b169..5007d04 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -84,7 +84,8 @@ XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti)
}
bool XCoreFrameLowering::hasFP(const MachineFunction &MF) const {
- return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MF.getFrameInfo()->hasVarSizedObjects();
}
void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 8d746ae..7564fba 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -41,8 +41,8 @@ namespace {
const XCoreSubtarget &Subtarget;
public:
- XCoreDAGToDAGISel(XCoreTargetMachine &TM)
- : SelectionDAGISel(TM),
+ XCoreDAGToDAGISel(XCoreTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel),
Lowering(*TM.getTargetLowering()),
Subtarget(*TM.getSubtargetImpl()) { }
@@ -83,8 +83,9 @@ namespace {
/// createXCoreISelDag - This pass converts a legalized DAG into a
/// XCore-specific DAG, ready for instruction scheduling.
///
-FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM) {
- return new XCoreDAGToDAGISel(TM);
+FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new XCoreDAGToDAGISel(TM, OptLevel);
}
bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index d791daa..c5c668e 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -109,6 +109,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::ROTL , MVT::i32, Expand);
setOperationAction(ISD::ROTR , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index eec3674..7e1e035 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -21,9 +21,10 @@ using namespace llvm;
///
XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
Subtarget(TT, CPU, FS),
DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
"i16:16:32-i32:32:32-i64:32:32-n32"),
@@ -34,7 +35,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
}
bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM) {
- PM.add(createXCoreISelDag(*this));
+ PM.add(createXCoreISelDag(*this, getOptLevel()));
return false;
}
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 3f2644d..0159b1e 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -33,7 +33,7 @@ class XCoreTargetMachine : public LLVMTargetMachine {
XCoreSelectionDAGInfo TSInfo;
public:
XCoreTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 8fa66fc..58b3551 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -20,14 +20,3 @@ add_llvm_library(LLVMipo
StripDeadPrototypes.cpp
StripSymbols.cpp
)
-
-add_llvm_library_dependencies(LLVMipo
- LLVMAnalysis
- LLVMCore
- LLVMInstCombine
- LLVMScalarOpts
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
- LLVMipa
- )
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index c57e9fc..2e869e6 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -26,6 +26,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -61,6 +62,7 @@ namespace {
struct GlobalStatus;
struct GlobalOpt : public ModulePass {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfo>();
}
static char ID; // Pass identification, replacement for typeid
GlobalOpt() : ModulePass(ID) {
@@ -84,7 +86,10 @@ namespace {
}
char GlobalOpt::ID = 0;
-INITIALIZE_PASS(GlobalOpt, "globalopt",
+INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
+ "Global Variable Optimizer", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(GlobalOpt, "globalopt",
"Global Variable Optimizer", false, false)
ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
@@ -345,6 +350,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
// and will invalidate our notion of what Init is.
Constant *SubInit = 0;
if (!isa<ConstantExpr>(GEP->getOperand(0))) {
+ // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding.
ConstantExpr *CE =
dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP));
if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
@@ -828,6 +834,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
static void ConstantPropUsersOf(Value *V) {
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
if (Instruction *I = dyn_cast<Instruction>(*UI++))
+ // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding.
if (Constant *NewC = ConstantFoldInstruction(I)) {
I->replaceAllUsesWith(NewC);
@@ -1931,7 +1938,8 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
if (GV->hasInitializer())
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
TargetData *TD = getAnalysisIfAvailable<TargetData>();
- Constant *New = ConstantFoldConstantExpression(CE, TD);
+ TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ Constant *New = ConstantFoldConstantExpression(CE, TD, TLI);
if (New && New != CE)
GV->setInitializer(New);
}
@@ -2304,7 +2312,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
DenseMap<Constant*, Constant*> &MutatedMemory,
std::vector<GlobalVariable*> &AllocaTmps,
SmallPtrSet<Constant*, 8> &SimpleConstants,
- const TargetData *TD) {
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
// Check to see if this function is already executing (recursion). If so,
// bail out. TODO: we might want to accept limited recursion.
if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
@@ -2461,7 +2470,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
if (Callee->isDeclaration()) {
// If this is a function we can constant fold, do it.
- if (Constant *C = ConstantFoldCall(Callee, Formals)) {
+ if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
InstResult = C;
} else {
return false;
@@ -2473,7 +2482,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
Constant *RetVal;
// Execute the call, if successful, use the return value.
if (!EvaluateFunction(Callee, RetVal, Formals, CallStack,
- MutatedMemory, AllocaTmps, SimpleConstants, TD))
+ MutatedMemory, AllocaTmps, SimpleConstants, TD,
+ TLI))
return false;
InstResult = RetVal;
}
@@ -2535,7 +2545,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
if (!CurInst->use_empty()) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
- InstResult = ConstantFoldConstantExpression(CE, TD);
+ InstResult = ConstantFoldConstantExpression(CE, TD, TLI);
Values[CurInst] = InstResult;
}
@@ -2547,7 +2557,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
/// we can. Return true if we can, false otherwise.
-static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) {
+static bool EvaluateStaticConstructor(Function *F, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
/// MutatedMemory - For each store we execute, we update this map. Loads
/// check this to get the most up-to-date value. If evaluation is successful,
/// this state is committed to the process.
@@ -2572,7 +2583,7 @@ static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) {
bool EvalSuccess = EvaluateFunction(F, RetValDummy,
SmallVector<Constant*, 0>(), CallStack,
MutatedMemory, AllocaTmps,
- SimpleConstants, TD);
+ SimpleConstants, TD, TLI);
if (EvalSuccess) {
// We succeeded at evaluation: commit the result.
@@ -2601,8 +2612,6 @@ static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) {
return EvalSuccess;
}
-
-
/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible.
/// Return true if anything changed.
bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
@@ -2611,6 +2620,8 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
if (Ctors.empty()) return false;
const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+
// Loop over global ctors, optimizing them when we can.
for (unsigned i = 0; i != Ctors.size(); ++i) {
Function *F = Ctors[i];
@@ -2628,7 +2639,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
if (F->empty()) continue;
// If we can evaluate the ctor at compile time, do.
- if (EvaluateStaticConstructor(F, TD)) {
+ if (EvaluateStaticConstructor(F, TD, TLI)) {
Ctors.erase(Ctors.begin()+i);
MadeChange = true;
--i;
diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt
index 884faca..b358fab 100644
--- a/lib/Transforms/IPO/LLVMBuild.txt
+++ b/lib/Transforms/IPO/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = IPO
parent = Transforms
library_name = ipo
required_libraries = Analysis Core IPA InstCombine Scalar Support Target TransformUtils
-
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 8fdfd72..f63f532 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -21,7 +21,6 @@
#include "llvm/DefaultPasses.h"
#include "llvm/PassManager.h"
#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/Verifier.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/IPO.h"
@@ -101,6 +100,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(Inliner);
Inliner = 0;
}
+ addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
return;
}
@@ -340,4 +340,3 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
PassManagerBase *LPM = unwrap(PM);
Builder->populateLTOPassManager(*LPM, Internalize, RunInliner);
}
-
diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt
index a46d5ad..d070ccc 100644
--- a/lib/Transforms/InstCombine/CMakeLists.txt
+++ b/lib/Transforms/InstCombine/CMakeLists.txt
@@ -13,11 +13,3 @@ add_llvm_library(LLVMInstCombine
InstCombineSimplifyDemanded.cpp
InstCombineVectorOps.cpp
)
-
-add_llvm_library_dependencies(LLVMInstCombine
- LLVMAnalysis
- LLVMCore
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
- )
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 3808278..464e9d0 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -22,6 +22,7 @@
namespace llvm {
class CallSite;
class TargetData;
+ class TargetLibraryInfo;
class DbgDeclareInst;
class MemIntrinsic;
class MemSetInst;
@@ -71,6 +72,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
: public FunctionPass,
public InstVisitor<InstCombiner, Instruction*> {
TargetData *TD;
+ TargetLibraryInfo *TLI;
bool MadeIRChange;
public:
/// Worklist - All of the instructions that need to be simplified.
@@ -92,9 +94,11 @@ public:
bool DoOneIteration(Function &F, unsigned ItNum);
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-
+
TargetData *getTargetData() const { return TD; }
+ TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; }
+
// Visitation implementation - Implement instruction combining for different
// instruction types. The semantics are as follows:
// Return Value:
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e8136ab..27c7c54 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -265,6 +265,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Get the current byte offset into the thing. Use the original
// operand in case we're looking through a bitcast.
SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end());
+ if (!GEP->getPointerOperandType()->isPointerTy())
+ return 0;
Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops);
Op1 = GEP->getPointerOperand()->stripPointerCasts();
@@ -960,7 +962,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
PointerType *PTy = cast<PointerType>(Callee->getType());
FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
if (FTy->isVarArg()) {
- int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 2 : 0);
+ int ix = FTy->getNumParams();
// See if we can optimize any arguments passed through the varargs area of
// the call.
for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index f10e48a..46e4acd 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -14,6 +14,7 @@
#include "InstCombine.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
@@ -147,8 +148,6 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
return ReplaceInstUsesWith(CI, New);
}
-
-
/// EvaluateInDifferentType - Given an expression that
/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
/// insert the code to evaluate the expression.
@@ -158,7 +157,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
// If we got a constantexpr back, try to simplify it with TD info.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- C = ConstantFoldConstantExpression(CE, TD);
+ C = ConstantFoldConstantExpression(CE, TD, TLI);
return C;
}
@@ -528,9 +527,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
return ReplaceInstUsesWith(CI, In);
}
-
-
-
+
// zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
// zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
// zext (X == 1) to i32 --> X iff X has only the low bit set.
@@ -1213,10 +1210,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
}
// Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
- // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it.
CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
- if (Call && Call->getCalledFunction() &&
- Call->getCalledFunction()->getName() == "sqrt" &&
+ if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) &&
+ Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) &&
Call->getNumArgOperands() == 1 &&
Call->hasOneUse()) {
CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index bb1cbfa..144b92b 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -284,7 +284,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// Find out if the comparison would be true or false for the i'th element.
Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
- CompareRHS, TD);
+ CompareRHS, TD, TLI);
// If the result is undef for this element, ignore it.
if (isa<UndefValue>(C)) {
// Extend range state machines to cover this element in case there is an
@@ -1657,6 +1657,14 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
return 0;
+ // This is only really a signed overflow check if the inputs have been
+ // sign-extended; check for that condition. For example, if CI2 is 2^31 and
+ // the operands of the add are 64 bits wide, we need at least 33 sign bits.
+ unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1;
+ if (IC.ComputeNumSignBits(A) < NeededSignBits ||
+ IC.ComputeNumSignBits(B) < NeededSignBits)
+ return 0;
+
// In order to replace the original add with a narrower
// llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
// and truncates that discard the high bits of the add. Verify that this is
@@ -1787,6 +1795,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD))
return ReplaceInstUsesWith(I, V);
+ // comparing -val or val with non-zero is the same as just comparing val
+ // ie, abs(val) != 0 -> val != 0
+ if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero()))
+ {
+ Value *Cond, *SelectTrue, *SelectFalse;
+ if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue),
+ m_Value(SelectFalse)))) {
+ if (Value *V = dyn_castNegVal(SelectTrue)) {
+ if (V == SelectFalse)
+ return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
+ }
+ else if (Value *V = dyn_castNegVal(SelectFalse)) {
+ if (V == SelectTrue)
+ return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
+ }
+ }
+ }
+
Type *Ty = Op0->getType();
// icmp's with boolean values can always be turned into bitwise operations
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 91e60a4..f1ea8ea 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -282,7 +282,8 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
/// replaced with RepOp.
static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
- const TargetData *TD) {
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
// Trivial replacement.
if (V == Op)
return RepOp;
@@ -294,17 +295,19 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
// If this is a binary operator, try to simplify it with the replaced op.
if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) {
if (B->getOperand(0) == Op)
- return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD);
+ return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD, TLI);
if (B->getOperand(1) == Op)
- return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD);
+ return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD, TLI);
}
// Same for CmpInsts.
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
if (C->getOperand(0) == Op)
- return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD);
+ return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD,
+ TLI);
if (C->getOperand(1) == Op)
- return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD);
+ return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD,
+ TLI);
}
// TODO: We could hand off more cases to instsimplify here.
@@ -330,7 +333,7 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
return ConstantFoldLoadFromConstPtr(ConstOps[0], TD);
return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- ConstOps, TD);
+ ConstOps, TD, TLI);
}
}
@@ -479,18 +482,18 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
// arms of the select. See if substituting this value into the arm and
// simplifying the result yields the same value as the other arm.
if (Pred == ICmpInst::ICMP_EQ) {
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal)
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal ||
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal)
return ReplaceInstUsesWith(SI, FalseVal);
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal)
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal ||
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal)
return ReplaceInstUsesWith(SI, FalseVal);
} else if (Pred == ICmpInst::ICMP_NE) {
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal)
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal ||
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal)
return ReplaceInstUsesWith(SI, TrueVal);
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal)
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal ||
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal)
return ReplaceInstUsesWith(SI, TrueVal);
}
@@ -679,6 +682,13 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return BinaryOperator::CreateOr(CondVal, FalseVal);
else if (CondVal == FalseVal)
return BinaryOperator::CreateAnd(CondVal, TrueVal);
+
+ // select a, ~a, b -> (~a)&b
+ // select a, b, ~a -> (~a)|b
+ if (match(TrueVal, m_Not(m_Specific(CondVal))))
+ return BinaryOperator::CreateAnd(TrueVal, FalseVal);
+ else if (match(FalseVal, m_Not(m_Specific(CondVal))))
+ return BinaryOperator::CreateOr(TrueVal, FalseVal);
}
// Selecting between two integer constants?
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 6d85add..702e0f2 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -190,7 +190,8 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
V = IC.Builder->CreateLShr(C, NumBits);
// If we got a constantexpr back, try to simplify it with TD info.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- V = ConstantFoldConstantExpression(CE, IC.getTargetData());
+ V = ConstantFoldConstantExpression(CE, IC.getTargetData(),
+ IC.getTargetLibraryInfo());
return V;
}
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index a7a6311..af065cd 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -41,6 +41,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
@@ -74,11 +75,15 @@ void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
}
char InstCombiner::ID = 0;
-INITIALIZE_PASS(InstCombiner, "instcombine",
+INITIALIZE_PASS_BEGIN(InstCombiner, "instcombine",
+ "Combine redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(InstCombiner, "instcombine",
"Combine redundant instructions", false, false)
void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfo>();
}
@@ -826,7 +831,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
MadeChange = true;
}
- if ((*I)->getType() != IntPtrTy) {
+ Type *IndexTy = (*I)->getType();
+ if (IndexTy != IntPtrTy && !IndexTy->isVectorTy()) {
// If we are using a wider index than needed for this platform, shrink
// it to what we need. If narrower, sign-extend it to what we need.
// This explicit cast can make subsequent optimizations more obvious.
@@ -909,7 +915,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
Value *StrippedPtr = PtrOp->stripPointerCasts();
- PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType());
+ PointerType *StrippedPtrTy = dyn_cast<PointerType>(StrippedPtr->getType());
+ // We do not handle pointer-vector geps here
+ if (!StrippedPtr)
+ return 0;
+
if (StrippedPtr != PtrOp &&
StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
@@ -1798,7 +1808,8 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
static bool AddReachableCodeToWorklist(BasicBlock *BB,
SmallPtrSet<BasicBlock*, 64> &Visited,
InstCombiner &IC,
- const TargetData *TD) {
+ const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
bool MadeIRChange = false;
SmallVector<BasicBlock*, 256> Worklist;
Worklist.push_back(BB);
@@ -1825,7 +1836,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
// ConstantProp instruction if trivially constant.
if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
- if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
+ if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) {
DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
<< *Inst << '\n');
Inst->replaceAllUsesWith(C);
@@ -1843,7 +1854,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
Constant*& FoldRes = FoldedConstants[CE];
if (!FoldRes)
- FoldRes = ConstantFoldConstantExpression(CE, TD);
+ FoldRes = ConstantFoldConstantExpression(CE, TD, TLI);
if (!FoldRes)
FoldRes = CE;
@@ -1909,7 +1920,8 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// the reachable instructions. Ignore blocks that are not reachable. Keep
// track of which blocks we visit.
SmallPtrSet<BasicBlock*, 64> Visited;
- MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
+ MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD,
+ TLI);
// Do a quick scan over the function. If we find any blocks that are
// unreachable, remove any instructions inside of them. This prevents
@@ -1954,7 +1966,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// Instruction isn't dead, see if we can constant propagate it.
if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
- if (Constant *C = ConstantFoldInstruction(I, TD)) {
+ if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
// Add operands to the worklist.
@@ -2062,7 +2074,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
bool InstCombiner::runOnFunction(Function &F) {
TD = getAnalysisIfAvailable<TargetData>();
-
+ TLI = &getAnalysis<TargetLibraryInfo>();
/// Builder - This is an IRBuilder that automatically inserts new
/// instructions into the worklist when they are created.
diff --git a/lib/Transforms/InstCombine/LLVMBuild.txt b/lib/Transforms/InstCombine/LLVMBuild.txt
index b73c303..62c6161 100644
--- a/lib/Transforms/InstCombine/LLVMBuild.txt
+++ b/lib/Transforms/InstCombine/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = InstCombine
parent = Transforms
required_libraries = Analysis Core Support Target TransformUtils
-
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index b617539..4cc5727 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -55,8 +55,11 @@ static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
static const char *kAsanModuleCtorName = "asan.module_ctor";
+static const char *kAsanModuleDtorName = "asan.module_dtor";
+static const int kAsanCtorAndCtorPriority = 1;
static const char *kAsanReportErrorTemplate = "__asan_report_";
static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
+static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
static const char *kAsanInitName = "__asan_init";
static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
static const char *kAsanMappingScaleName = "__asan_mapping_scale";
@@ -434,6 +437,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
IRBuilder<> IRB1(CheckTerm);
Instruction *Crash = generateCrashCode(IRB1, AddrLong, IsWrite, TypeSize);
Crash->setDebugLoc(OrigIns->getDebugLoc());
+ ReplaceInstWithInst(CheckTerm, new UnreachableInst(*C));
}
// This function replaces all global variables with new variables that have
@@ -517,7 +521,11 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
NewTy, G->getInitializer(),
Constant::getNullValue(RightRedZoneTy), NULL);
- GlobalVariable *Name = createPrivateGlobalForString(M, G->getName());
+ SmallString<2048> DescriptionOfGlobal = G->getName();
+ DescriptionOfGlobal += " (";
+ DescriptionOfGlobal += M.getModuleIdentifier();
+ DescriptionOfGlobal += ")";
+ GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal);
// Create a new global variable with enough space for a redzone.
GlobalVariable *NewGlobal = new GlobalVariable(
@@ -558,6 +566,22 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
IRB.CreatePointerCast(AllGlobals, IntptrTy),
ConstantInt::get(IntptrTy, n));
+ // We also need to unregister globals at the end, e.g. when a shared library
+ // gets closed.
+ Function *AsanDtorFunction = Function::Create(
+ FunctionType::get(Type::getVoidTy(*C), false),
+ GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
+ BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
+ IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB));
+ Function *AsanUnregisterGlobals = cast<Function>(M.getOrInsertFunction(
+ kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
+
+ IRB_Dtor.CreateCall2(AsanUnregisterGlobals,
+ IRB.CreatePointerCast(AllGlobals, IntptrTy),
+ ConstantInt::get(IntptrTy, n));
+ appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndCtorPriority);
+
DEBUG(dbgs() << M);
return true;
}
@@ -631,7 +655,7 @@ bool AddressSanitizer::runOnModule(Module &M) {
Res |= handleFunction(M, *F);
}
- appendToGlobalCtors(M, AsanCtorFunction, 1 /*high priority*/);
+ appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority);
return Res;
}
@@ -951,8 +975,8 @@ BlackList::BlackList(const std::string &Path) {
OwningPtr<MemoryBuffer> File;
if (error_code EC = MemoryBuffer::getFile(ClBlackListFile.c_str(), File)) {
- errs() << EC.message();
- exit(1);
+ report_fatal_error("Can't open blacklist file " + ClBlackListFile + ": " +
+ EC.message());
}
MemoryBuffer *Buff = File.take();
const char *Data = Buff->getBufferStart();
@@ -962,15 +986,23 @@ BlackList::BlackList(const std::string &Path) {
for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) {
if (Lines[i].startswith(kFunPrefix)) {
std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix));
- if (Fun.size()) {
- Fun += "|";
- }
+ std::string ThisFuncRE;
// add ThisFunc replacing * with .*
for (size_t j = 0, n = ThisFunc.size(); j < n; j++) {
if (ThisFunc[j] == '*')
- Fun += '.';
- Fun += ThisFunc[j];
+ ThisFuncRE += '.';
+ ThisFuncRE += ThisFunc[j];
}
+ // Check that the regexp is valid.
+ Regex CheckRE(ThisFuncRE);
+ std::string Error;
+ if (!CheckRE.isValid(Error))
+ report_fatal_error("malformed blacklist regex: " + ThisFunc +
+ ": " + Error);
+ // Append to the final regexp.
+ if (Fun.size())
+ Fun += "|";
+ Fun += ThisFuncRE;
}
}
if (Fun.size()) {
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 929b7cd..a4a1fef 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -7,10 +7,3 @@ add_llvm_library(LLVMInstrumentation
PathProfiling.cpp
ProfilingUtils.cpp
)
-
-add_llvm_library_dependencies(LLVMInstrumentation
- LLVMAnalysis
- LLVMCore
- LLVMSupport
- LLVMTransformUtils
- )
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index ccf7e11..96e5d5b 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -43,12 +43,14 @@ namespace {
public:
static char ID;
GCOVProfiler()
- : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false) {
+ : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false),
+ UseExtraChecksum(false) {
initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
}
- GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false)
+ GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false,
+ bool useExtraChecksum = false)
: ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData),
- Use402Format(use402Format) {
+ Use402Format(use402Format), UseExtraChecksum(useExtraChecksum) {
assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?");
initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
}
@@ -94,6 +96,7 @@ namespace {
bool EmitNotes;
bool EmitData;
bool Use402Format;
+ bool UseExtraChecksum;
Module *M;
LLVMContext *Ctx;
@@ -105,8 +108,9 @@ INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling",
"Insert instrumentation for GCOV profiling", false, false)
ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData,
- bool Use402Format) {
- return new GCOVProfiler(EmitNotes, EmitData, Use402Format);
+ bool Use402Format,
+ bool UseExtraChecksum) {
+ return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum);
}
namespace {
@@ -167,7 +171,7 @@ namespace {
}
uint32_t length() {
- // Here 2 = 1 for string lenght + 1 for '0' id#.
+ // Here 2 = 1 for string length + 1 for '0' id#.
return lengthOfGCOVString(Filename) + 2 + Lines.size();
}
@@ -244,10 +248,12 @@ namespace {
// object users can construct, the blocks and lines will be rooted here.
class GCOVFunction : public GCOVRecord {
public:
- GCOVFunction(DISubprogram SP, raw_ostream *os, bool Use402Format) {
+ GCOVFunction(DISubprogram SP, raw_ostream *os,
+ bool Use402Format, bool UseExtraChecksum) {
this->os = os;
Function *F = SP.getFunction();
+ DEBUG(dbgs() << "Function: " << F->getName() << "\n");
uint32_t i = 0;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
Blocks[BB] = new GCOVBlock(i++, os);
@@ -257,14 +263,14 @@ namespace {
writeBytes(FunctionTag, 4);
uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) +
1 + lengthOfGCOVString(SP.getFilename()) + 1;
- if (!Use402Format)
- ++BlockLen; // For second checksum.
+ if (UseExtraChecksum)
+ ++BlockLen;
write(BlockLen);
uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP);
write(Ident);
- write(0); // checksum #1
- if (!Use402Format)
- write(0); // checksum #2
+ write(0); // lineno checksum
+ if (UseExtraChecksum)
+ write(0); // cfg checksum
writeGCOVString(SP.getName());
writeGCOVString(SP.getFilename());
write(SP.getLineNumber());
@@ -290,6 +296,7 @@ namespace {
for (int i = 0, e = Blocks.size() + 1; i != e; ++i) {
write(0); // No flags on our blocks.
}
+ DEBUG(dbgs() << Blocks.size() << " blocks.\n");
// Emit edges between blocks.
for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(),
@@ -301,6 +308,8 @@ namespace {
write(Block.OutEdges.size() * 2 + 1);
write(Block.Number);
for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) {
+ DEBUG(dbgs() << Block.Number << " -> " << Block.OutEdges[i]->Number
+ << "\n");
write(Block.OutEdges[i]->Number);
write(0); // no flags
}
@@ -350,68 +359,60 @@ bool GCOVProfiler::runOnModule(Module &M) {
}
void GCOVProfiler::emitGCNO() {
- DenseMap<const MDNode *, raw_fd_ostream *> GcnoFiles;
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
- if (CU_Nodes) {
- for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
- // Each compile unit gets its own .gcno file. This means that whether we run
- // this pass over the original .o's as they're produced, or run it after
- // LTO, we'll generate the same .gcno files.
-
- DICompileUnit CU(CU_Nodes->getOperand(i));
- raw_fd_ostream *&out = GcnoFiles[CU];
- std::string ErrorInfo;
- out = new raw_fd_ostream(mangleName(CU, "gcno").c_str(), ErrorInfo,
- raw_fd_ostream::F_Binary);
- if (!Use402Format)
- out->write("oncg*404MVLL", 12);
- else
- out->write("oncg*204MVLL", 12);
-
- DIArray SPs = CU.getSubprograms();
- for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
- DISubprogram SP(SPs.getElement(i));
- if (!SP.Verify()) continue;
- raw_fd_ostream *&os = GcnoFiles[CU];
-
- Function *F = SP.getFunction();
- if (!F) continue;
- GCOVFunction Func(SP, os, Use402Format);
-
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- GCOVBlock &Block = Func.getBlock(BB);
- TerminatorInst *TI = BB->getTerminator();
- if (int successors = TI->getNumSuccessors()) {
- for (int i = 0; i != successors; ++i) {
- Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
- }
- } else if (isa<ReturnInst>(TI)) {
- Block.addEdge(Func.getReturnBlock());
- }
-
- uint32_t Line = 0;
- for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) {
- const DebugLoc &Loc = I->getDebugLoc();
- if (Loc.isUnknown()) continue;
- if (Line == Loc.getLine()) continue;
- Line = Loc.getLine();
- if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue;
-
- GCOVLines &Lines = Block.getFile(SP.getFilename());
- Lines.addLine(Loc.getLine());
+ if (!CU_Nodes) return;
+
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ // Each compile unit gets its own .gcno file. This means that whether we run
+ // this pass over the original .o's as they're produced, or run it after
+ // LTO, we'll generate the same .gcno files.
+
+ DICompileUnit CU(CU_Nodes->getOperand(i));
+ std::string ErrorInfo;
+ raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo,
+ raw_fd_ostream::F_Binary);
+ if (!Use402Format)
+ out.write("oncg*404MVLL", 12);
+ else
+ out.write("oncg*204MVLL", 12);
+
+ DIArray SPs = CU.getSubprograms();
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+ DISubprogram SP(SPs.getElement(i));
+ if (!SP.Verify()) continue;
+
+ Function *F = SP.getFunction();
+ if (!F) continue;
+ GCOVFunction Func(SP, &out, Use402Format, UseExtraChecksum);
+
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ GCOVBlock &Block = Func.getBlock(BB);
+ TerminatorInst *TI = BB->getTerminator();
+ if (int successors = TI->getNumSuccessors()) {
+ for (int i = 0; i != successors; ++i) {
+ Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
}
+ } else if (isa<ReturnInst>(TI)) {
+ Block.addEdge(Func.getReturnBlock());
+ }
+
+ uint32_t Line = 0;
+ for (BasicBlock::iterator I = BB->begin(), IE = BB->end();
+ I != IE; ++I) {
+ const DebugLoc &Loc = I->getDebugLoc();
+ if (Loc.isUnknown()) continue;
+ if (Line == Loc.getLine()) continue;
+ Line = Loc.getLine();
+ if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue;
+
+ GCOVLines &Lines = Block.getFile(SP.getFilename());
+ Lines.addLine(Loc.getLine());
}
- Func.writeOut();
}
+ Func.writeOut();
}
- }
-
- for (DenseMap<const MDNode *, raw_fd_ostream *>::iterator
- I = GcnoFiles.begin(), E = GcnoFiles.end(); I != E; ++I) {
- raw_fd_ostream *&out = I->second;
- out->write("\0\0\0\0\0\0\0\0", 8); // EOF
- out->close();
- delete out;
+ out.write("\0\0\0\0\0\0\0\0", 8); // EOF
+ out.close();
}
}
diff --git a/lib/Transforms/Instrumentation/LLVMBuild.txt b/lib/Transforms/Instrumentation/LLVMBuild.txt
index f302d03..d36ad54 100644
--- a/lib/Transforms/Instrumentation/LLVMBuild.txt
+++ b/lib/Transforms/Instrumentation/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = Instrumentation
parent = Transforms
required_libraries = Analysis Core Support TransformUtils
-
diff --git a/lib/Transforms/LLVMBuild.txt b/lib/Transforms/LLVMBuild.txt
index d36b898..b2ef49a 100644
--- a/lib/Transforms/LLVMBuild.txt
+++ b/lib/Transforms/LLVMBuild.txt
@@ -15,8 +15,10 @@
;
;===------------------------------------------------------------------------===;
+[common]
+subdirectories = IPO InstCombine Instrumentation Scalar Utils
+
[component_0]
type = Group
name = Transforms
parent = Libraries
-
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index a6f0cf3..d660c72 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -32,12 +32,3 @@ add_llvm_library(LLVMScalarOpts
Sink.cpp
TailRecursionElimination.cpp
)
-
-add_llvm_library_dependencies(LLVMScalarOpts
- LLVMAnalysis
- LLVMCore
- LLVMInstCombine
- LLVMSupport
- LLVMTarget
- LLVMTransformUtils
- )
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index f8f18b2..f9abfe9 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -26,6 +26,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Transforms/Utils/AddrModeMatcher.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -69,6 +70,7 @@ namespace {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetLowering *TLI;
+ const TargetLibraryInfo *TLInfo;
DominatorTree *DT;
ProfileInfo *PFI;
@@ -97,6 +99,7 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
AU.addPreserved<ProfileInfo>();
+ AU.addRequired<TargetLibraryInfo>();
}
private:
@@ -116,7 +119,10 @@ namespace {
}
char CodeGenPrepare::ID = 0;
-INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
+INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
+ "Optimize for code generation", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare",
"Optimize for code generation", false, false)
FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
@@ -127,6 +133,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
bool EverMadeChange = false;
ModifiedDT = false;
+ TLInfo = &getAnalysis<TargetLibraryInfo>();
DT = getAnalysisIfAvailable<DominatorTree>();
PFI = getAnalysisIfAvailable<ProfileInfo>();
@@ -542,7 +549,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
WeakVH IterHandle(CurInstIterator);
ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0,
- ModifiedDT ? 0 : DT);
+ TLInfo, ModifiedDT ? 0 : DT);
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 664c3f6..5430f62 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -24,6 +24,8 @@
#include "llvm/Constant.h"
#include "llvm/Instruction.h"
#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/InstIterator.h"
#include "llvm/ADT/Statistic.h"
#include <set>
@@ -42,19 +44,22 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfo>();
}
};
}
char ConstantPropagation::ID = 0;
-INITIALIZE_PASS(ConstantPropagation, "constprop",
+INITIALIZE_PASS_BEGIN(ConstantPropagation, "constprop",
+ "Simple constant propagation", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(ConstantPropagation, "constprop",
"Simple constant propagation", false, false)
FunctionPass *llvm::createConstantPropagationPass() {
return new ConstantPropagation();
}
-
bool ConstantPropagation::runOnFunction(Function &F) {
// Initialize the worklist to all of the instructions ready to process...
std::set<Instruction*> WorkList;
@@ -62,13 +67,15 @@ bool ConstantPropagation::runOnFunction(Function &F) {
WorkList.insert(&*i);
}
bool Changed = false;
+ TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
while (!WorkList.empty()) {
Instruction *I = *WorkList.begin();
WorkList.erase(WorkList.begin()); // Get an element from the worklist...
if (!I->use_empty()) // Don't muck with dead instructions...
- if (Constant *C = ConstantFoldInstruction(I)) {
+ if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
// Add all of the users of this instruction to the worklist, they might
// be constant propagatable now...
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index f5688cb..8729019 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -416,7 +416,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// writes to addresses which will definitely be overwritten later
if (LaterOff > EarlierOff &&
LaterOff < int64_t(EarlierOff + Earlier.Size) &&
- LaterOff + Later.Size >= EarlierOff + Earlier.Size)
+ int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))
return OverwriteEnd;
// Otherwise, they don't completely overlap.
@@ -624,6 +624,7 @@ static void FindUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
BasicBlock *BB, DominatorTree *DT) {
for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
BasicBlock *Pred = *I;
+ if (Pred == BB) continue;
TerminatorInst *PredTI = Pred->getTerminator();
if (PredTI->getNumSuccessors() != 1)
continue;
@@ -853,4 +854,3 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
I != E; ++I)
DeadStackObjects.erase(*I);
}
-
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index c0223d2..5241e11 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/RecyclingAllocator.h"
@@ -215,6 +216,7 @@ namespace {
class EarlyCSE : public FunctionPass {
public:
const TargetData *TD;
+ const TargetLibraryInfo *TLI;
DominatorTree *DT;
typedef RecyclingAllocator<BumpPtrAllocator,
ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
@@ -263,6 +265,7 @@ private:
// This transformation requires dominator postdominator info
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
+ AU.addRequired<TargetLibraryInfo>();
AU.setPreservesCFG();
}
};
@@ -277,6 +280,7 @@ FunctionPass *llvm::createEarlyCSEPass() {
INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
bool EarlyCSE::processNode(DomTreeNode *Node) {
@@ -328,7 +332,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// If the instruction can be simplified (e.g. X+0 = X) then replace it with
// its simpler value.
- if (Value *V = SimplifyInstruction(Inst, TD, DT)) {
+ if (Value *V = SimplifyInstruction(Inst, TD, TLI, DT)) {
DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n');
Inst->replaceAllUsesWith(V);
Inst->eraseFromParent();
@@ -455,6 +459,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
bool EarlyCSE::runOnFunction(Function &F) {
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
DT = &getAnalysis<DominatorTree>();
// Tables that the pass uses when walking the domtree.
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index a51cbb6..374fdd7 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -31,6 +31,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/ADT/DenseMap.h"
@@ -446,7 +447,8 @@ namespace {
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
const TargetData *TD;
-
+ const TargetLibraryInfo *TLI;
+
ValueTable VN;
/// LeaderTable - A mapping from value numbers to lists of Value*'s that
@@ -530,6 +532,7 @@ namespace {
// This transformation requires dominator postdominator info
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
+ AU.addRequired<TargetLibraryInfo>();
if (!NoLoads)
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
@@ -568,6 +571,7 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) {
INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
@@ -2032,7 +2036,7 @@ bool GVN::processInstruction(Instruction *I) {
// to value numbering it. Value numbering often exposes redundancies, for
// example if it determines that %y is equal to %x then the instruction
// "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
- if (Value *V = SimplifyInstruction(I, TD, DT)) {
+ if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
I->replaceAllUsesWith(V);
if (MD && V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
@@ -2134,6 +2138,7 @@ bool GVN::runOnFunction(Function& F) {
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTree>();
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
VN.setMemDep(MD);
VN.setDomTree(DT);
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 0772b48..ad8689a 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -182,7 +182,7 @@ bool GlobalMerge::doInitialization(Module &M) {
continue;
// Ignore fancy-aligned globals for now.
- unsigned Alignment = I->getAlignment();
+ unsigned Alignment = TD->getPreferredAlignment(I);
Type *Ty = I->getType()->getElementType();
if (Alignment > TD->getABITypeAlignment(Ty))
continue;
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 1f21108..6d52b22 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -58,18 +58,16 @@ STATISTIC(NumLFTR , "Number of loop exit tests replaced");
STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
-namespace llvm {
- cl::opt<bool> EnableIVRewrite(
- "enable-iv-rewrite", cl::Hidden,
- cl::desc("Enable canonical induction variable rewriting"));
-
- // Trip count verification can be enabled by default under NDEBUG if we
- // implement a strong expression equivalence checker in SCEV. Until then, we
- // use the verify-indvars flag, which may assert in some cases.
- cl::opt<bool> VerifyIndvars(
- "verify-indvars", cl::Hidden,
- cl::desc("Verify the ScalarEvolution result after running indvars"));
-}
+static cl::opt<bool> EnableIVRewrite(
+ "enable-iv-rewrite", cl::Hidden,
+ cl::desc("Enable canonical induction variable rewriting"));
+
+// Trip count verification can be enabled by default under NDEBUG if we
+// implement a strong expression equivalence checker in SCEV. Until then, we
+// use the verify-indvars flag, which may assert in some cases.
+static cl::opt<bool> VerifyIndvars(
+ "verify-indvars", cl::Hidden,
+ cl::desc("Verify the ScalarEvolution result after running indvars"));
namespace {
class IndVarSimplify : public LoopPass {
@@ -180,6 +178,11 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
// base of a recurrence. This handles the case in which SCEV expansion
// converts a pointer type recurrence into a nonrecurrent pointer base
// indexed by an integer recurrence.
+
+ // If the GEP base pointer is a vector of pointers, abort.
+ if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
+ return false;
+
const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
if (FromBase == ToBase)
@@ -946,9 +949,13 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
else
return 0;
+ // When creating this AddExpr, don't apply the current operations NSW or NUW
+ // flags. This instruction may be guarded by control flow that the no-wrap
+ // behavior depends on. Non-control-equivalent instructions can be mapped to
+ // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
+ // semantics to those operations.
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
- SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr,
- IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW));
+ SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));
if (!AddRec || AddRec->getLoop() != L)
return 0;
@@ -1231,7 +1238,11 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
/// BackedgeTakenInfo. If these expressions have not been reduced, then
/// expanding them may incur additional cost (albeit in the loop preheader).
static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
+ SmallPtrSet<const SCEV*, 8> &Processed,
ScalarEvolution *SE) {
+ if (!Processed.insert(S))
+ return false;
+
// If the backedge-taken count is a UDiv, it's very likely a UDiv that
// ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a
// precise expression, rather than a UDiv from the user's code. If we can't
@@ -1259,7 +1270,7 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I) {
- if (isHighCostExpansion(*I, BI, SE))
+ if (isHighCostExpansion(*I, BI, Processed, SE))
return true;
}
return false;
@@ -1302,7 +1313,8 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
if (!BI)
return false;
- if (isHighCostExpansion(BackedgeTakenCount, BI, SE))
+ SmallPtrSet<const SCEV*, 8> Processed;
+ if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE))
return false;
return true;
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index f410af3..c78db3f 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -24,6 +24,7 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Statistic.h"
@@ -75,6 +76,7 @@ namespace {
///
class JumpThreading : public FunctionPass {
TargetData *TD;
+ TargetLibraryInfo *TLI;
LazyValueInfo *LVI;
#ifdef NDEBUG
SmallPtrSet<BasicBlock*, 16> LoopHeaders;
@@ -107,6 +109,7 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LazyValueInfo>();
AU.addPreserved<LazyValueInfo>();
+ AU.addRequired<TargetLibraryInfo>();
}
void FindLoopHeaders(Function &F);
@@ -133,6 +136,7 @@ char JumpThreading::ID = 0;
INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
"Jump Threading", false, false)
INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(JumpThreading, "jump-threading",
"Jump Threading", false, false)
@@ -144,6 +148,7 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
LVI = &getAnalysis<LazyValueInfo>();
FindLoopHeaders(F);
@@ -674,7 +679,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// Run constant folding to see if we can reduce the condition to a simple
// constant.
if (Instruction *I = dyn_cast<Instruction>(Condition)) {
- Value *SimpleVal = ConstantFoldInstruction(I, TD);
+ Value *SimpleVal = ConstantFoldInstruction(I, TD, TLI);
if (SimpleVal) {
I->replaceAllUsesWith(SimpleVal);
I->eraseFromParent();
@@ -921,8 +926,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Split them out to their own block.
UnavailablePred =
- SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
- "thread-pre-split", this);
+ SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split", this);
}
// If the value isn't available in all predecessors, then there will be
@@ -1334,8 +1338,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
- ".thr_comm", this);
+ PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
}
// And finally, do it!
@@ -1479,8 +1482,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
- PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
- ".thr_comm", this);
+ PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
}
// Okay, we decided to do this! Clone all the instructions in BB onto the end
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 8098b36..8795cd8 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -43,8 +43,11 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
@@ -84,6 +87,7 @@ namespace {
AU.addPreserved<AliasAnalysis>();
AU.addPreserved("scalar-evolution");
AU.addPreservedID(LoopSimplifyID);
+ AU.addRequired<TargetLibraryInfo>();
}
bool doFinalization() {
@@ -96,6 +100,9 @@ namespace {
LoopInfo *LI; // Current LoopInfo
DominatorTree *DT; // Dominator Tree for the current Loop.
+ TargetData *TD; // TargetData for constant folding.
+ TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding.
+
// State that is updated as we process loops.
bool Changed; // Set to true when we change anything.
BasicBlock *Preheader; // The preheader block of the current loop...
@@ -177,6 +184,7 @@ INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
@@ -194,6 +202,9 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
AA = &getAnalysis<AliasAnalysis>();
DT = &getAnalysis<DominatorTree>();
+ TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
CurAST = new AliasSetTracker(*AA);
// Collect Alias info from subloops.
for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
@@ -333,7 +344,7 @@ void LICM::HoistRegion(DomTreeNode *N) {
// Try constant folding this instruction. If all the operands are
// constants, it is technically hoistable, but it would be better to just
// fold it.
- if (Constant *C = ConstantFoldInstruction(&I)) {
+ if (Constant *C = ConstantFoldInstruction(&I, TD, TLI)) {
DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n');
CurAST->copyValue(&I, C);
CurAST->deleteValue(&I);
@@ -369,7 +380,7 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
// in the same alias set as something that ends up being modified.
if (AA->pointsToConstantMemory(LI->getOperand(0)))
return true;
- if (LI->getMetadata(LI->getContext().getMDKindID("invariant.load")))
+ if (LI->getMetadata("invariant.load"))
return true;
// Don't hoist loads which have may-aliased stores in loop.
@@ -581,7 +592,7 @@ void LICM::hoist(Instruction &I) {
///
bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
// If it is not a trapping instruction, it is always safe to hoist.
- if (Inst.isSafeToSpeculativelyExecute())
+ if (isSafeToSpeculativelyExecute(&Inst))
return true;
return isGuaranteedToExecute(Inst);
diff --git a/lib/Transforms/Scalar/LLVMBuild.txt b/lib/Transforms/Scalar/LLVMBuild.txt
index 027634d..cee9119 100644
--- a/lib/Transforms/Scalar/LLVMBuild.txt
+++ b/lib/Transforms/Scalar/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = Scalar
parent = Transforms
library_name = ScalarOpts
required_libraries = Analysis Core InstCombine Support Target TransformUtils
-
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index af25c5c..f0f05e6 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/Statistic.h"
@@ -43,6 +44,7 @@ namespace {
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
AU.addPreserved("scalar-evolution");
+ AU.addRequired<TargetLibraryInfo>();
}
};
}
@@ -50,6 +52,7 @@ namespace {
char LoopInstSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
"Simplify instructions in loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -64,6 +67,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
LoopInfo *LI = &getAnalysis<LoopInfo>();
const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SmallVector<BasicBlock*, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
@@ -104,7 +108,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Don't bother simplifying unused instructions.
if (!I->use_empty()) {
- Value *V = SimplifyInstruction(I, TD, DT);
+ Value *V = SimplifyInstruction(I, TD, TLI, DT);
if (V && LI->replacementPreservesLCSSAForm(I, V)) {
// Mark all uses for resimplification next time round the loop.
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 4ae51d5..840614e 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -77,19 +77,17 @@
#include <algorithm>
using namespace llvm;
-namespace llvm {
-cl::opt<bool> EnableNested(
+static cl::opt<bool> EnableNested(
"enable-lsr-nested", cl::Hidden, cl::desc("Enable LSR on nested loops"));
-cl::opt<bool> EnableRetry(
- "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry"));
+static cl::opt<bool> EnableRetry(
+ "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry"));
// Temporary flag to cleanup congruent phis after LSR phi expansion.
// It's currently disabled until we can determine whether it's truly useful or
// not. The flag should be removed after the v3.0 release.
-cl::opt<bool> EnablePhiElim(
- "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination"));
-}
+static cl::opt<bool> EnablePhiElim(
+ "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination"));
namespace {
@@ -636,6 +634,19 @@ static Type *getAccessType(const Instruction *Inst) {
return AccessTy;
}
+/// isExistingPhi - Return true if this AddRec is already a phi in its loop.
+static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
+ for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ if (SE.isSCEVable(PN->getType()) &&
+ (SE.getEffectiveSCEVType(PN->getType()) ==
+ SE.getEffectiveSCEVType(AR->getType())) &&
+ SE.getSCEV(PN) == AR)
+ return true;
+ }
+ return false;
+}
+
/// DeleteTriviallyDeadInstructions - If any of the instructions is the
/// specified set are trivially dead, delete them and see if this makes any of
/// their operands subsequently dead.
@@ -705,7 +716,8 @@ public:
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
- ScalarEvolution &SE, DominatorTree &DT);
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
void print(raw_ostream &OS) const;
void dump() const;
@@ -718,7 +730,8 @@ private:
void RatePrimaryRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT);
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs);
};
}
@@ -738,18 +751,13 @@ void Cost::RateRegister(const SCEV *Reg,
// on other loops, and cannot be expected to change sibling loops. If the
// AddRec exists, consider it's register free and leave it alone. Otherwise,
// do not consider this formula at all.
- // FIXME: why do we need to generate such fomulae?
else if (!EnableNested || L->contains(AR->getLoop()) ||
(!AR->getLoop()->contains(L) &&
DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
- for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I) {
- if (SE.isSCEVable(PN->getType()) &&
- (SE.getEffectiveSCEVType(PN->getType()) ==
- SE.getEffectiveSCEVType(AR->getType())) &&
- SE.getSCEV(PN) == AR)
- return;
- }
+ if (isExistingPhi(AR, SE))
+ return;
+
+ // For !EnableNested, never rewrite IVs in other loops.
if (!EnableNested) {
Loose();
return;
@@ -791,13 +799,22 @@ void Cost::RateRegister(const SCEV *Reg,
}
/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
-/// before, rate it.
+/// before, rate it. Optional LoserRegs provides a way to declare any formula
+/// that refers to one of those regs an instant loser.
void Cost::RatePrimaryRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT) {
- if (Regs.insert(Reg))
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs) {
+ if (LoserRegs && LoserRegs->count(Reg)) {
+ Loose();
+ return;
+ }
+ if (Regs.insert(Reg)) {
RateRegister(Reg, Regs, L, SE, DT);
+ if (isLoser())
+ LoserRegs->insert(Reg);
+ }
}
void Cost::RateFormula(const Formula &F,
@@ -805,14 +822,15 @@ void Cost::RateFormula(const Formula &F,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
- ScalarEvolution &SE, DominatorTree &DT) {
+ ScalarEvolution &SE, DominatorTree &DT,
+ SmallPtrSet<const SCEV *, 16> *LoserRegs) {
// Tally up the registers.
if (const SCEV *ScaledReg = F.ScaledReg) {
if (VisitedRegs.count(ScaledReg)) {
Loose();
return;
}
- RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
+ RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
if (isLoser())
return;
}
@@ -823,7 +841,7 @@ void Cost::RateFormula(const Formula &F,
Loose();
return;
}
- RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
+ RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
if (isLoser())
return;
}
@@ -1105,7 +1123,6 @@ bool LSRUse::InsertFormula(const Formula &F) {
Formulae.push_back(F);
// Record registers now being used by this use.
- if (F.ScaledReg) Regs.insert(F.ScaledReg);
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
return true;
@@ -1116,7 +1133,6 @@ void LSRUse::DeleteFormula(Formula &F) {
if (&F != &Formulae.back())
std::swap(F, Formulae.back());
Formulae.pop_back();
- assert(!Formulae.empty() && "LSRUse has no formulae left!");
}
/// RecomputeRegs - Recompute the Regs field, and update RegUses.
@@ -1389,7 +1405,6 @@ class LSRInstance {
LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
-public:
void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void CountRegisters(const Formula &F, size_t LUIdx);
@@ -1450,6 +1465,7 @@ public:
void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Pass *P);
+public:
LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
bool getChanged() const { return Changed; }
@@ -2045,7 +2061,8 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
do {
const SCEV *S = Worklist.pop_back_val();
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
- Strides.insert(AR->getStepRecurrence(SE));
+ if (EnableNested || AR->getLoop() == L)
+ Strides.insert(AR->getStepRecurrence(SE));
Worklist.push_back(AR->getStart());
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
Worklist.append(Add->op_begin(), Add->op_end());
@@ -2914,6 +2931,7 @@ LSRInstance::GenerateAllReuseFormulae() {
void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
DenseSet<const SCEV *> VisitedRegs;
SmallPtrSet<const SCEV *, 16> Regs;
+ SmallPtrSet<const SCEV *, 16> LoserRegs;
#ifndef NDEBUG
bool ChangedFormulae = false;
#endif
@@ -2933,46 +2951,66 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
FIdx != NumForms; ++FIdx) {
Formula &F = LU.Formulae[FIdx];
- SmallVector<const SCEV *, 2> Key;
- for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
- JE = F.BaseRegs.end(); J != JE; ++J) {
- const SCEV *Reg = *J;
- if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
- Key.push_back(Reg);
+ // Some formulas are instant losers. For example, they may depend on
+ // nonexistent AddRecs from other loops. These need to be filtered
+ // immediately, otherwise heuristics could choose them over others leading
+ // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
+ // avoids the need to recompute this information across formulae using the
+ // same bad AddRec. Passing LoserRegs is also essential unless we remove
+ // the corresponding bad register from the Regs set.
+ Cost CostF;
+ Regs.clear();
+ CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT,
+ &LoserRegs);
+ if (CostF.isLoser()) {
+ // During initial formula generation, undesirable formulae are generated
+ // by uses within other loops that have some non-trivial address mode or
+ // use the postinc form of the IV. LSR needs to provide these formulae
+ // as the basis of rediscovering the desired formula that uses an AddRec
+ // corresponding to the existing phi. Once all formulae have been
+ // generated, these initial losers may be pruned.
+ DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());
+ dbgs() << "\n");
}
- if (F.ScaledReg &&
- RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
- Key.push_back(F.ScaledReg);
- // Unstable sort by host order ok, because this is only used for
- // uniquifying.
- std::sort(Key.begin(), Key.end());
-
- std::pair<BestFormulaeTy::const_iterator, bool> P =
- BestFormulae.insert(std::make_pair(Key, FIdx));
- if (!P.second) {
+ else {
+ SmallVector<const SCEV *, 2> Key;
+ for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
+ JE = F.BaseRegs.end(); J != JE; ++J) {
+ const SCEV *Reg = *J;
+ if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
+ Key.push_back(Reg);
+ }
+ if (F.ScaledReg &&
+ RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
+ Key.push_back(F.ScaledReg);
+ // Unstable sort by host order ok, because this is only used for
+ // uniquifying.
+ std::sort(Key.begin(), Key.end());
+
+ std::pair<BestFormulaeTy::const_iterator, bool> P =
+ BestFormulae.insert(std::make_pair(Key, FIdx));
+ if (P.second)
+ continue;
+
Formula &Best = LU.Formulae[P.first->second];
- Cost CostF;
- CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
- Regs.clear();
Cost CostBest;
- CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
Regs.clear();
+ CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
if (CostF < CostBest)
std::swap(F, Best);
DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n"
" in favor of formula "; Best.print(dbgs());
dbgs() << '\n');
+ }
#ifndef NDEBUG
- ChangedFormulae = true;
+ ChangedFormulae = true;
#endif
- LU.DeleteFormula(F);
- --FIdx;
- --NumForms;
- Any = true;
- continue;
- }
+ LU.DeleteFormula(F);
+ --FIdx;
+ --NumForms;
+ Any = true;
}
// Now that we've filtered out some formulae, recompute the Regs set.
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 37f4c2c..22dbfe3 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -40,10 +40,9 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
cl::desc("Allows loops to be partially unrolled until "
"-unroll-threshold loop size is reached."));
-// Temporary flag to be removed in 3.0
static cl::opt<bool>
-NoSCEVUnroll("disable-unroll-scev", cl::init(false), cl::Hidden,
- cl::desc("Use ScalarEvolution to analyze loop trip counts for unrolling"));
+UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden,
+ cl::desc("Unroll loops with run-time trip counts"));
namespace {
class LoopUnroll : public LoopPass {
@@ -68,6 +67,10 @@ namespace {
// explicit -unroll-threshold).
static const unsigned OptSizeUnrollThreshold = 50;
+ // Default unroll count for loops with run-time trip count if
+ // -unroll-count is not set
+ static const unsigned UnrollRuntimeCount = 8;
+
unsigned CurrentCount;
unsigned CurrentThreshold;
bool CurrentAllowPartial;
@@ -148,23 +151,21 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
unsigned TripMultiple = 1;
- if (!NoSCEVUnroll) {
- // Find "latch trip count". UnrollLoop assumes that control cannot exit
- // via the loop latch on any iteration prior to TripCount. The loop may exit
- // early via an earlier branch.
- BasicBlock *LatchBlock = L->getLoopLatch();
- if (LatchBlock) {
- TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
- TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
- }
- }
- else {
- TripCount = L->getSmallConstantTripCount();
- if (TripCount == 0)
- TripMultiple = L->getSmallConstantTripMultiple();
+ // Find "latch trip count". UnrollLoop assumes that control cannot exit
+ // via the loop latch on any iteration prior to TripCount. The loop may exit
+ // early via an earlier branch.
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (LatchBlock) {
+ TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
+ TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
}
- // Automatically select an unroll count.
+ // Use a default unroll-count if the user doesn't specify a value
+ // and the trip count is a run-time value. The default is different
+ // for run-time or compile-time trip count loops.
unsigned Count = CurrentCount;
+ if (UnrollRuntime && CurrentCount == 0 && TripCount == 0)
+ Count = UnrollRuntimeCount;
+
if (Count == 0) {
// Conservative heuristic: if we know the trip count, see if we can
// completely unroll (subject to the threshold, checked below); otherwise
@@ -189,15 +190,23 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
if (TripCount != 1 && Size > Threshold) {
DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
<< " because size: " << Size << ">" << Threshold << "\n");
- if (!CurrentAllowPartial) {
+ if (!CurrentAllowPartial && !(UnrollRuntime && TripCount == 0)) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
return false;
}
- // Reduce unroll count to be modulo of TripCount for partial unrolling
- Count = Threshold / LoopSize;
- while (Count != 0 && TripCount%Count != 0) {
- Count--;
+ if (TripCount) {
+ // Reduce unroll count to be modulo of TripCount for partial unrolling
+ Count = CurrentThreshold / LoopSize;
+ while (Count != 0 && TripCount%Count != 0)
+ Count--;
+ }
+ else if (UnrollRuntime) {
+ // Reduce unroll count to be a lower power-of-two value
+ while (Count != 0 && Size > CurrentThreshold) {
+ Count >>= 1;
+ Size = LoopSize*Count;
+ }
}
if (Count < 2) {
DEBUG(dbgs() << " could not unroll partially\n");
@@ -208,7 +217,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Unroll the loop.
- if (!UnrollLoop(L, Count, TripCount, TripMultiple, LI, &LPM))
+ if (!UnrollLoop(L, Count, TripCount, UnrollRuntime, TripMultiple, LI, &LPM))
return false;
return true;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 458949c..a2d0e98 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -71,7 +71,9 @@ namespace {
// LoopProcessWorklist - Used to check if second loop needs processing
// after RewriteLoopBodyWithConditionConstant rewrites first loop.
std::vector<Loop*> LoopProcessWorklist;
- SmallPtrSet<Value *,8> UnswitchedVals;
+
+ // FIXME: Consider custom class for this.
+ std::map<const SwitchInst*, SmallPtrSet<const Value *,8> > UnswitchedVals;
bool OptimizeForSize;
bool redoLoop;
@@ -117,7 +119,15 @@ namespace {
private:
virtual void releaseMemory() {
- UnswitchedVals.clear();
+ // We need to forget about all switches in the current loop.
+ // FIXME: Do it better than enumerating all blocks of code
+ // and see if it is a switch instruction.
+ for (Loop::block_iterator I = currentLoop->block_begin(),
+ E = currentLoop->block_end(); I != E; ++I) {
+ SwitchInst* SI = dyn_cast<SwitchInst>((*I)->getTerminator());
+ if (SI)
+ UnswitchedVals.erase(SI);
+ }
}
/// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
@@ -128,6 +138,12 @@ namespace {
if (I != LoopProcessWorklist.end())
LoopProcessWorklist.erase(I);
}
+
+ /// For new loop switches we clone info about values that was
+ /// already unswitched and has redundant successors.
+ /// Note, that new loop data is stored inside the VMap.
+ void CloneUnswitchedVals(const ValueToValueMapTy& VMap,
+ const BasicBlock* SrcBB);
void initLoopData() {
loopHeader = currentLoop->getHeader();
@@ -255,13 +271,25 @@ bool LoopUnswitch::processCurrentLoop() {
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
currentLoop, Changed);
- if (LoopCond && SI->getNumCases() > 1) {
+ unsigned NumCases = SI->getNumCases();
+ if (LoopCond && NumCases > 1) {
// Find a value to unswitch on:
// FIXME: this should chose the most expensive case!
// FIXME: scan for a case with a non-critical edge?
- Constant *UnswitchVal = SI->getCaseValue(1);
+ Constant *UnswitchVal = NULL;
+
// Do not process same value again and again.
- if (!UnswitchedVals.insert(UnswitchVal))
+ // At this point we have some cases already unswitched and
+ // some not yet unswitched. Let's find the first not yet unswitched one.
+ for (unsigned i = 1; i < NumCases; ++i) {
+ Constant* UnswitchValCandidate = SI->getCaseValue(i);
+ if (!UnswitchedVals[SI].count(UnswitchValCandidate)) {
+ UnswitchVal = UnswitchValCandidate;
+ break;
+ }
+ }
+
+ if (!UnswitchVal)
continue;
if (UnswitchIfProfitable(LoopCond, UnswitchVal)) {
@@ -287,6 +315,23 @@ bool LoopUnswitch::processCurrentLoop() {
return Changed;
}
+/// For new loop switches we clone info about values that was
+/// already unswitched and has redundant successors.
+/// Not that new loop data is stored inside the VMap.
+void LoopUnswitch::CloneUnswitchedVals(const ValueToValueMapTy& VMap,
+ const BasicBlock* SrcBB) {
+
+ const SwitchInst* SI = dyn_cast<SwitchInst>(SrcBB->getTerminator());
+ if (SI && UnswitchedVals.count(SI)) {
+ // Don't clone a totally simplified switch.
+ if (isa<Constant>(SI->getCondition()))
+ return;
+ Value* I = VMap.lookup(SI);
+ assert(I && "All instructions that are in SrcBB must be in VMap.");
+ UnswitchedVals[cast<SwitchInst>(I)] = UnswitchedVals[SI];
+ }
+}
+
/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
/// loop with no side effects (including infinite loops).
///
@@ -378,14 +423,25 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
// Check to see if a successor of the switch is guaranteed to go to the
// latch block or exit through a one exit block without having any
// side-effects. If so, determine the value of Cond that causes it to do
- // this. Note that we can't trivially unswitch on the default case.
- for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
- if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ // this.
+ // Note that we can't trivially unswitch on the default case or
+ // on already unswitched cases.
+ for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock* LoopExitCandidate;
+ if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
SI->getSuccessor(i)))) {
// Okay, we found a trivial case, remember the value that is trivial.
- if (Val) *Val = SI->getCaseValue(i);
+ ConstantInt* CaseVal = SI->getCaseValue(i);
+
+ // Check that it was not unswitched before, since already unswitched
+ // trivial vals are looks trivial too.
+ if (UnswitchedVals[SI].count(CaseVal))
+ continue;
+ LoopExitBB = LoopExitCandidate;
+ if (Val) *Val = CaseVal;
break;
}
+ }
}
// If we didn't find a single unique LoopExit block, or if the loop exit block
@@ -447,8 +503,14 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
// expansion, and the number of basic blocks, to avoid loops with
// large numbers of branches which cause loop unswitching to go crazy.
// This is a very ad-hoc heuristic.
- if (Metrics.NumInsts > Threshold ||
- Metrics.NumBlocks * 5 > Threshold ||
+
+ unsigned NumUnswitched =
+ (NumSwitches + NumBranches) + 1 /*take in account current iteration*/;
+
+ unsigned NumInsts = Metrics.NumInsts * NumUnswitched;
+ unsigned NumBlocks = Metrics.NumBlocks * NumUnswitched;
+
+ if (NumInsts > Threshold || NumBlocks * 5 > Threshold ||
Metrics.containsIndirectBr || Metrics.isRecursive) {
DEBUG(dbgs() << "NOT unswitching loop %"
<< currentLoop->getHeader()->getName() << ", cost too high: "
@@ -565,8 +627,7 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
// Although SplitBlockPredecessors doesn't preserve loop-simplify in
// general, if we call it on all predecessors of all exits then it does.
if (!ExitBlock->isLandingPad()) {
- SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(),
- ".us-lcssa", this);
+ SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", this);
} else {
SmallVector<BasicBlock*, 2> NewBBs;
SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa",
@@ -621,6 +682,12 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
ValueToValueMapTy VMap;
for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
+
+ // Inherit simplified switches info for NewBB
+ // We needn't pass NewBB since its instructions are already contained
+ // inside the VMap.
+ CloneUnswitchedVals(VMap, LoopBlocks[i]);
+
NewBlocks.push_back(NewBB);
VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping.
LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
@@ -907,9 +974,13 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
Instruction *U = dyn_cast<Instruction>(*UI);
if (!U || !L->contains(U))
continue;
- U->replaceUsesOfWith(LIC, Replacement);
Worklist.push_back(U);
}
+
+ for (std::vector<Instruction*>::iterator UI = Worklist.begin();
+ UI != Worklist.end(); ++UI)
+ (*UI)->replaceUsesOfWith(LIC, Replacement);
+
SimplifyCode(Worklist, L);
return;
}
@@ -942,6 +1013,9 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
BasicBlock *Switch = SI->getParent();
BasicBlock *SISucc = SI->getSuccessor(DeadCase);
BasicBlock *Latch = L->getLoopLatch();
+
+ UnswitchedVals[SI].insert(Val);
+
if (!SI->findCaseDest(SISucc)) continue; // Edge is critical.
// If the DeadCase successor dominates the loop latch, then the
// transformation isn't safe since it will delete the sole predecessor edge
@@ -1017,7 +1091,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
// See if instruction simplification can hack this up. This is common for
// things like "select false, X, Y" after unswitching made the condition be
// 'false'.
- if (Value *V = SimplifyInstruction(I, 0, DT))
+ if (Value *V = SimplifyInstruction(I, 0, 0, DT))
if (LI->replacementPreservesLCSSAForm(I, V)) {
ReplaceUsesOfWith(I, V, Worklist, L, LPM);
continue;
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 9e4f51f..7335626 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -147,8 +147,8 @@ struct MemsetRange {
} // end anon namespace
bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
- // If we found more than 8 stores to merge or 64 bytes, use memset.
- if (TheStores.size() >= 8 || End-Start >= 64) return true;
+ // If we found more than 4 stores to merge or 16 bytes, use memset.
+ if (TheStores.size() >= 4 || End-Start >= 16) return true;
// If there is nothing to merge, don't do anything.
if (TheStores.size() < 2) return false;
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index 80f5f01..8e9449f 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -179,9 +179,13 @@ static bool IsPotentialUse(const Value *Op) {
Arg->hasNestAttr() ||
Arg->hasStructRetAttr())
return false;
- // Only consider values with pointer types, and not function pointers.
+ // Only consider values with pointer types.
+ // It seemes intuitive to exclude function pointer types as well, since
+ // functions are never reference-counted, however clang occasionally
+ // bitcasts reference-counted pointers to function-pointer type
+ // temporarily.
PointerType *Ty = dyn_cast<PointerType>(Op->getType());
- if (!Ty || isa<FunctionType>(Ty->getElementType()))
+ if (!Ty)
return false;
// Conservatively assume anything else is a potential use.
return true;
@@ -896,8 +900,9 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
#include "llvm/LLVMContext.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/CFG.h"
-#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/DenseSet.h"
STATISTIC(NumNoops, "Number of no-op objc calls eliminated");
STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
@@ -1165,6 +1170,7 @@ namespace {
/// Partial - True of we've seen an opportunity for partial RR elimination,
/// such as pushing calls into a CFG triangle or into one side of a
/// CFG diamond.
+ /// TODO: Consider moving this to PtrState.
bool Partial;
/// ReleaseMetadata - If the Calls are objc_release calls and they all have
@@ -1251,16 +1257,6 @@ namespace {
Seq = NewSeq;
}
- void SetSeqToRelease(MDNode *M) {
- if (Seq == S_None || Seq == S_Use) {
- Seq = M ? S_MovableRelease : S_Release;
- RRI.ReleaseMetadata = M;
- } else if (Seq != S_MovableRelease || RRI.ReleaseMetadata != M) {
- Seq = S_Release;
- RRI.ReleaseMetadata = 0;
- }
- }
-
Sequence GetSeq() const {
return Seq;
}
@@ -1488,7 +1484,7 @@ namespace {
/// metadata.
unsigned ImpreciseReleaseMDKind;
- /// CopyOnEscape - The Metadata Kind for clang.arc.copy_on_escape
+ /// CopyOnEscapeMDKind - The Metadata Kind for clang.arc.copy_on_escape
/// metadata.
unsigned CopyOnEscapeMDKind;
@@ -2255,6 +2251,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
// guards against loops in the middle of a sequence.
if (SomeSuccHasSame && !AllSuccsHaveSame)
S.ClearSequenceProgress();
+ break;
}
case S_CanRelease: {
const Value *Arg = I->first;
@@ -2289,6 +2286,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
// guards against loops in the middle of a sequence.
if (SomeSuccHasSame && !AllSuccsHaveSame)
S.ClearSequenceProgress();
+ break;
}
}
}
@@ -2350,8 +2348,11 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
NestingDetected = true;
- S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind));
S.RRI.clear();
+
+ MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+ S.SetSeq(ReleaseMetadata ? S_MovableRelease : S_Release);
+ S.RRI.ReleaseMetadata = ReleaseMetadata;
S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented();
S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
S.RRI.Calls.insert(Inst);
@@ -2494,18 +2495,16 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
if (Pred == BB)
continue;
DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
- assert(I != BBStates.end());
// If we haven't seen this node yet, then we've found a CFG cycle.
// Be optimistic here; it's CheckForCFGHazards' job detect trouble.
- if (!I->second.isVisitedTopDown())
+ if (I == BBStates.end() || !I->second.isVisitedTopDown())
continue;
MyStates.InitFromPred(I->second);
while (PI != PE) {
Pred = *PI++;
if (Pred != BB) {
I = BBStates.find(Pred);
- assert(I != BBStates.end());
- if (I->second.isVisitedTopDown())
+ if (I == BBStates.end() || I->second.isVisitedTopDown())
MyStates.MergePred(I->second);
}
}
@@ -2661,49 +2660,106 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
return NestingDetected;
}
-// Visit - Visit the function both top-down and bottom-up.
-bool
-ObjCARCOpt::Visit(Function &F,
- DenseMap<const BasicBlock *, BBState> &BBStates,
- MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases) {
- // Use reverse-postorder on the reverse CFG for bottom-up, because we
- // magically know that loops will be well behaved, i.e. they won't repeatedly
- // call retain on a single pointer without doing a release. We can't use
- // ReversePostOrderTraversal here because we want to walk up from each
- // function exit point.
+static void
+ComputePostOrders(Function &F,
+ SmallVectorImpl<BasicBlock *> &PostOrder,
+ SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder) {
+ /// Backedges - Backedges detected in the DFS. These edges will be
+ /// ignored in the reverse-CFG DFS, so that loops with multiple exits will be
+ /// traversed in the desired order.
+ DenseSet<std::pair<BasicBlock *, BasicBlock *> > Backedges;
+
+ /// Visited - The visited set, for doing DFS walks.
SmallPtrSet<BasicBlock *, 16> Visited;
- SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> Stack;
- SmallVector<BasicBlock *, 16> Order;
+
+ // Do DFS, computing the PostOrder.
+ SmallPtrSet<BasicBlock *, 16> OnStack;
+ SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack;
+ BasicBlock *EntryBB = &F.getEntryBlock();
+ SuccStack.push_back(std::make_pair(EntryBB, succ_begin(EntryBB)));
+ Visited.insert(EntryBB);
+ OnStack.insert(EntryBB);
+ do {
+ dfs_next_succ:
+ succ_iterator End = succ_end(SuccStack.back().first);
+ while (SuccStack.back().second != End) {
+ BasicBlock *BB = *SuccStack.back().second++;
+ if (Visited.insert(BB)) {
+ SuccStack.push_back(std::make_pair(BB, succ_begin(BB)));
+ OnStack.insert(BB);
+ goto dfs_next_succ;
+ }
+ if (OnStack.count(BB))
+ Backedges.insert(std::make_pair(SuccStack.back().first, BB));
+ }
+ OnStack.erase(SuccStack.back().first);
+ PostOrder.push_back(SuccStack.pop_back_val().first);
+ } while (!SuccStack.empty());
+
+ Visited.clear();
+
+ // Compute the exits, which are the starting points for reverse-CFG DFS.
+ SmallVector<BasicBlock *, 4> Exits;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
BasicBlock *BB = I;
if (BB->getTerminator()->getNumSuccessors() == 0)
- Stack.push_back(std::make_pair(BB, pred_begin(BB)));
+ Exits.push_back(BB);
}
- while (!Stack.empty()) {
- pred_iterator End = pred_end(Stack.back().first);
- while (Stack.back().second != End) {
- BasicBlock *BB = *Stack.back().second++;
- if (Visited.insert(BB))
- Stack.push_back(std::make_pair(BB, pred_begin(BB)));
- }
- Order.push_back(Stack.pop_back_val().first);
+
+ // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
+ SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> PredStack;
+ for (SmallVectorImpl<BasicBlock *>::iterator I = Exits.begin(), E = Exits.end();
+ I != E; ++I) {
+ BasicBlock *ExitBB = *I;
+ PredStack.push_back(std::make_pair(ExitBB, pred_begin(ExitBB)));
+ Visited.insert(ExitBB);
+ while (!PredStack.empty()) {
+ reverse_dfs_next_succ:
+ pred_iterator End = pred_end(PredStack.back().first);
+ while (PredStack.back().second != End) {
+ BasicBlock *BB = *PredStack.back().second++;
+ // Skip backedges detected in the forward-CFG DFS.
+ if (Backedges.count(std::make_pair(BB, PredStack.back().first)))
+ continue;
+ if (Visited.insert(BB)) {
+ PredStack.push_back(std::make_pair(BB, pred_begin(BB)));
+ goto reverse_dfs_next_succ;
+ }
+ }
+ ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first);
+ }
}
+}
+
+// Visit - Visit the function both top-down and bottom-up.
+bool
+ObjCARCOpt::Visit(Function &F,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases) {
+
+ // Use reverse-postorder traversals, because we magically know that loops
+ // will be well behaved, i.e. they won't repeatedly call retain on a single
+ // pointer without doing a release. We can't use the ReversePostOrderTraversal
+ // class here because we want the reverse-CFG postorder to consider each
+ // function exit point, and we want to ignore selected cycle edges.
+ SmallVector<BasicBlock *, 16> PostOrder;
+ SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
+ ComputePostOrders(F, PostOrder, ReverseCFGPostOrder);
+
+ // Use reverse-postorder on the reverse CFG for bottom-up.
bool BottomUpNestingDetected = false;
for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
- Order.rbegin(), E = Order.rend(); I != E; ++I) {
- BasicBlock *BB = *I;
- BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
- }
+ ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend();
+ I != E; ++I)
+ BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains);
- // Use regular reverse-postorder for top-down.
+ // Use reverse-postorder for top-down.
bool TopDownNestingDetected = false;
- typedef ReversePostOrderTraversal<Function *> RPOTType;
- RPOTType RPOT(&F);
- for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
- BasicBlock *BB = *I;
- TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases);
- }
+ for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
+ PostOrder.rbegin(), E = PostOrder.rend();
+ I != E; ++I)
+ TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases);
return TopDownNestingDetected && BottomUpNestingDetected;
}
@@ -3139,7 +3195,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
UE = Alloca->use_end(); UI != UE; ) {
CallInst *UserInst = cast<CallInst>(*UI++);
if (!UserInst->use_empty())
- UserInst->replaceAllUsesWith(UserInst->getOperand(1));
+ UserInst->replaceAllUsesWith(UserInst->getArgOperand(0));
UserInst->eraseFromParent();
}
Alloca->eraseFromParent();
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index f6762ad..e4cb55c 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -156,6 +157,7 @@ namespace {
///
class SCCPSolver : public InstVisitor<SCCPSolver> {
const TargetData *TD;
+ const TargetLibraryInfo *TLI;
SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable.
DenseMap<Value*, LatticeVal> ValueState; // The state each value is in.
@@ -206,7 +208,8 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
typedef std::pair<BasicBlock*, BasicBlock*> Edge;
DenseSet<Edge> KnownFeasibleEdges;
public:
- SCCPSolver(const TargetData *td) : TD(td) {}
+ SCCPSolver(const TargetData *td, const TargetLibraryInfo *tli)
+ : TD(td), TLI(tli) {}
/// MarkBlockExecutable - This method can be used by clients to mark all of
/// the blocks that are known to be intrinsically live in the processed unit.
@@ -1125,7 +1128,7 @@ CallOverdefined:
// If we can constant fold this, mark the result of the call as a
// constant.
- if (Constant *C = ConstantFoldCall(F, Operands))
+ if (Constant *C = ConstantFoldCall(F, Operands, TLI))
return markConstant(I, C);
}
@@ -1517,6 +1520,9 @@ namespace {
/// Sparse Conditional Constant Propagator.
///
struct SCCP : public FunctionPass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfo>();
+ }
static char ID; // Pass identification, replacement for typeid
SCCP() : FunctionPass(ID) {
initializeSCCPPass(*PassRegistry::getPassRegistry());
@@ -1569,7 +1575,9 @@ static void DeleteInstructionInBlock(BasicBlock *BB) {
//
bool SCCP::runOnFunction(Function &F) {
DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
- SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ SCCPSolver Solver(TD, TLI);
// Mark the first block of the function as being executable.
Solver.MarkBlockExecutable(F.begin());
@@ -1641,6 +1649,9 @@ namespace {
/// Constant Propagation.
///
struct IPSCCP : public ModulePass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfo>();
+ }
static char ID;
IPSCCP() : ModulePass(ID) {
initializeIPSCCPPass(*PassRegistry::getPassRegistry());
@@ -1650,7 +1661,11 @@ namespace {
} // end anonymous namespace
char IPSCCP::ID = 0;
-INITIALIZE_PASS(IPSCCP, "ipsccp",
+INITIALIZE_PASS_BEGIN(IPSCCP, "ipsccp",
+ "Interprocedural Sparse Conditional Constant Propagation",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(IPSCCP, "ipsccp",
"Interprocedural Sparse Conditional Constant Propagation",
false, false)
@@ -1689,7 +1704,9 @@ static bool AddressIsTaken(const GlobalValue *GV) {
}
bool IPSCCP::runOnModule(Module &M) {
- SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+ const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ SCCPSolver Solver(TD, TLI);
// AddressTakenFunctions - This set keeps track of the address-taken functions
// that are in the input. As IPSCCP runs through and simplifies code,
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 4b14efc..bc70c51 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -453,6 +453,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+ if (!GEP->getPointerOperandType()->isPointerTy())
+ return false;
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
Indices);
// See if all uses can be converted.
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 6e169de..f3184ec 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -999,7 +999,7 @@ struct FFSOpt : public LibCallOptimization {
Type *ArgType = Op->getType();
Value *F = Intrinsic::getDeclaration(Callee->getParent(),
Intrinsic::cttz, ArgType);
- Value *V = B.CreateCall(F, Op, "cttz");
+ Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
V = B.CreateIntCast(V, B.getInt32Ty(), false);
@@ -1293,7 +1293,8 @@ struct FWriteOpt : public LibCallOptimization {
return ConstantInt::get(CI->getType(), 0);
// If this is writing one byte, turn it into fputc.
- if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F)
+ // This optimisation is only valid, if the return value is unused.
+ if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
EmitFPutC(Char, CI->getArgOperand(3), B, TD);
return ConstantInt::get(CI->getType(), 1);
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index c83f56c..ef65c0a 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CFG.h"
@@ -240,7 +241,7 @@ bool Sinking::SinkInstruction(Instruction *Inst,
if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) {
// We cannot sink a load across a critical edge - there may be stores in
// other code paths.
- if (!Inst->isSafeToSpeculativelyExecute()) {
+ if (!isSafeToSpeculativelyExecute(Inst)) {
DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n");
return false;
}
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index 8e5a1eb..d831452 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -473,14 +473,7 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
// Check to see if this value is already used in the memory instruction's
// block. If so, it's already live into the block at the very least, so we
// can reasonably fold it.
- BasicBlock *MemBB = MemoryInst->getParent();
- for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end();
- UI != E; ++UI)
- // We know that uses of arguments and instructions have to be instructions.
- if (cast<Instruction>(*UI)->getParent() == MemBB)
- return true;
-
- return false;
+ return Val->isUsedInBasicBlock(MemoryInst->getParent());
}
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index a7f9efd..ef4a473 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -453,9 +453,8 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
/// of the edges being split is an exit of a loop with other exits).
///
BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
- BasicBlock *const *Preds,
- unsigned NumPreds, const char *Suffix,
- Pass *P) {
+ ArrayRef<BasicBlock*> Preds,
+ const char *Suffix, Pass *P) {
// Create new basic block, insert right before the original block.
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
BB->getParent(), BB);
@@ -464,7 +463,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
BranchInst *BI = BranchInst::Create(BB, NewBB);
// Move the edges from Preds to point to NewBB instead of BB.
- for (unsigned i = 0; i != NumPreds; ++i) {
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
// This is slightly more strict than necessary; the minimum requirement
// is that there be no more than one indirectbr branching to BB. And
// all BlockAddress uses would need to be updated.
@@ -477,7 +476,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// node becomes an incoming value for BB's phi node. However, if the Preds
// list is empty, we need to insert dummy entries into the PHI nodes in BB to
// account for the newly created predecessor.
- if (NumPreds == 0) {
+ if (Preds.size() == 0) {
// Insert dummy values as the incoming value.
for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
@@ -486,12 +485,10 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// Update DominatorTree, LoopInfo, and LCCSA analysis information.
bool HasLoopExit = false;
- UpdateAnalysisInformation(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds),
- P, HasLoopExit);
+ UpdateAnalysisInformation(BB, NewBB, Preds, P, HasLoopExit);
// Update the PHI nodes in BB with the values coming from NewBB.
- UpdatePHINodes(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), BI,
- P, HasLoopExit);
+ UpdatePHINodes(BB, NewBB, Preds, BI, P, HasLoopExit);
return NewBB;
}
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index c052910..f752d79 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -372,8 +372,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// form, which we're in the process of restoring!
if (!Preds.empty() && HasPredOutsideOfLoop) {
BasicBlock *NewExitBB =
- SplitBlockPredecessors(Exit, Preds.data(), Preds.size(),
- "split", P);
+ SplitBlockPredecessors(Exit, Preds, "split", P);
if (P->mustPreserveAnalysisID(LCSSAID))
CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit);
}
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 6d5432d..d96f59c 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_library(LLVMTransformUtils
Local.cpp
LoopSimplify.cpp
LoopUnroll.cpp
+ LoopUnrollRuntime.cpp
LowerExpectIntrinsic.cpp
LowerInvoke.cpp
LowerSwitch.cpp
@@ -28,11 +29,3 @@ add_llvm_library(LLVMTransformUtils
Utils.cpp
ValueMapper.cpp
)
-
-add_llvm_library_dependencies(LLVMTransformUtils
- LLVMAnalysis
- LLVMCore
- LLVMSupport
- LLVMTarget
- LLVMipa
- )
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index dd4a659..f89e1b1 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -924,40 +924,44 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
return false;
}
- // Find the personality function used by the landing pads of the caller. If it
- // exists, then check to see that it matches the personality function used in
- // the callee.
- for (Function::const_iterator
- I = Caller->begin(), E = Caller->end(); I != E; ++I)
+ // Get the personality function from the callee if it contains a landing pad.
+ Value *CalleePersonality = 0;
+ for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
+ I != E; ++I)
if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
const BasicBlock *BB = II->getUnwindDest();
- // FIXME: This 'isa' here should become go away once the new EH system is
- // in place.
- if (!isa<LandingPadInst>(BB->getFirstNonPHI()))
- continue;
- const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI());
- const Value *CallerPersFn = LP->getPersonalityFn();
-
- // If the personality functions match, then we can perform the
- // inlining. Otherwise, we can't inline.
- // TODO: This isn't 100% true. Some personality functions are proper
- // supersets of others and can be used in place of the other.
- for (Function::const_iterator
- I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I)
- if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
- const BasicBlock *BB = II->getUnwindDest();
- // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once
- // the new EH system is in place.
- if (const LandingPadInst *LP =
- dyn_cast<LandingPadInst>(BB->getFirstNonPHI()))
- if (CallerPersFn != LP->getPersonalityFn())
- return false;
- break;
- }
-
+ // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once
+ // the new EH system is in place.
+ if (const LandingPadInst *LP =
+ dyn_cast<LandingPadInst>(BB->getFirstNonPHI()))
+ CalleePersonality = LP->getPersonalityFn();
break;
}
+ // Find the personality function used by the landing pads of the caller. If it
+ // exists, then check to see that it matches the personality function used in
+ // the callee.
+ if (CalleePersonality)
+ for (Function::const_iterator I = Caller->begin(), E = Caller->end();
+ I != E; ++I)
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
+ const BasicBlock *BB = II->getUnwindDest();
+ // FIXME: This 'isa' here should become go away once the new EH system
+ // is in place.
+ if (!isa<LandingPadInst>(BB->getFirstNonPHI()))
+ continue;
+ const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI());
+
+ // If the personality functions match, then we can perform the
+ // inlining. Otherwise, we can't inline.
+ // TODO: This isn't 100% true. Some personality functions are proper
+ // supersets of others and can be used in place of the other.
+ if (LP->getPersonalityFn() != CalleePersonality)
+ return false;
+
+ break;
+ }
+
// Get an iterator to the last basic block in the function, which will have
// the new function inlined after it.
//
diff --git a/lib/Transforms/Utils/LLVMBuild.txt b/lib/Transforms/Utils/LLVMBuild.txt
index dea7b02..88b2ffe 100644
--- a/lib/Transforms/Utils/LLVMBuild.txt
+++ b/lib/Transforms/Utils/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = TransformUtils
parent = Transforms
required_libraries = Analysis Core IPA Support Target
-
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 134ab71..4dd93cf 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -494,22 +494,8 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
if (Succ->getSinglePredecessor()) return true;
// Make a list of the predecessors of BB
- typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
- BlockSet BBPreds(pred_begin(BB), pred_end(BB));
-
- // Use that list to make another list of common predecessors of BB and Succ
- BlockSet CommonPreds;
- for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
- PI != PE; ++PI) {
- BasicBlock *P = *PI;
- if (BBPreds.count(P))
- CommonPreds.insert(P);
- }
+ SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
- // Shortcut, if there are no common predecessors, merging is always safe
- if (CommonPreds.empty())
- return true;
-
// Look at all the phi nodes in Succ, to see if they present a conflict when
// merging these blocks
for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
@@ -520,28 +506,28 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
// merge the phi nodes and then the blocks can still be merged
PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
if (BBPN && BBPN->getParent() == BB) {
- for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
- PI != PE; PI++) {
- if (BBPN->getIncomingValueForBlock(*PI)
- != PN->getIncomingValueForBlock(*PI)) {
+ for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
+ BasicBlock *IBB = PN->getIncomingBlock(PI);
+ if (BBPreds.count(IBB) &&
+ BBPN->getIncomingValueForBlock(IBB) != PN->getIncomingValue(PI)) {
DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
<< Succ->getName() << " is conflicting with "
<< BBPN->getName() << " with regard to common predecessor "
- << (*PI)->getName() << "\n");
+ << IBB->getName() << "\n");
return false;
}
}
} else {
Value* Val = PN->getIncomingValueForBlock(BB);
- for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
- PI != PE; PI++) {
+ for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
// See if the incoming value for the common predecessor is equal to the
// one for BB, in which case this phi node will not prevent the merging
// of the block.
- if (Val != PN->getIncomingValueForBlock(*PI)) {
+ BasicBlock *IBB = PN->getIncomingBlock(PI);
+ if (BBPreds.count(IBB) && Val != PN->getIncomingValue(PI)) {
DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in "
<< Succ->getName() << " is conflicting with regard to common "
- << "predecessor " << (*PI)->getName() << "\n");
+ << "predecessor " << IBB->getName() << "\n");
return false;
}
}
@@ -748,6 +734,10 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
// If there is a large requested alignment and we can, bump up the alignment
// of the global.
if (GV->isDeclaration()) return Align;
+ // If the memory we set aside for the global may not be the memory used by
+ // the final program then it is impossible for us to reliably enforce the
+ // preferred alignment.
+ if (GV->isWeakForLinker()) return Align;
if (GV->getAlignment() >= PrefAlign)
return GV->getAlignment();
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index cbd54a8..4376265 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -99,7 +99,8 @@ namespace {
bool ProcessLoop(Loop *L, LPPassManager &LPM);
BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
BasicBlock *InsertPreheaderForLoop(Loop *L);
- Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM);
+ Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM,
+ BasicBlock *Preheader);
BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
void PlaceSplitBlockCarefully(BasicBlock *NewBB,
SmallVectorImpl<BasicBlock*> &SplitPreds,
@@ -240,7 +241,7 @@ ReprocessLoop:
// this for loops with a giant number of backedges, just factor them into a
// common backedge instead.
if (L->getNumBackEdges() < 8) {
- if (SeparateNestedLoop(L, LPM)) {
+ if (SeparateNestedLoop(L, LPM, Preheader)) {
++NumNested;
// This is a big restructuring change, reprocess the whole loop.
Changed = true;
@@ -265,7 +266,7 @@ ReprocessLoop:
PHINode *PN;
for (BasicBlock::iterator I = L->getHeader()->begin();
(PN = dyn_cast<PHINode>(I++)); )
- if (Value *V = SimplifyInstruction(PN, 0, DT)) {
+ if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
if (AA) AA->deleteValue(PN);
if (SE) SE->forgetValue(PN);
PN->replaceAllUsesWith(V);
@@ -379,19 +380,27 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
}
// Split out the loop pre-header.
- BasicBlock *NewBB =
- SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
- ".preheader", this);
+ BasicBlock *PreheaderBB;
+ if (!Header->isLandingPad()) {
+ PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
+ this);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
+ ".split-lp", this, NewBBs);
+ PreheaderBB = NewBBs[0];
+ }
- NewBB->getTerminator()->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());
- DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName()
- << "\n");
+ PreheaderBB->getTerminator()->setDebugLoc(
+ Header->getFirstNonPHI()->getDebugLoc());
+ DEBUG(dbgs() << "LoopSimplify: Creating pre-header "
+ << PreheaderBB->getName() << "\n");
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
- PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L);
+ PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
- return NewBB;
+ return PreheaderBB;
}
/// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
@@ -420,9 +429,7 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
this, NewBBs);
NewExitBB = NewBBs[0];
} else {
- NewExitBB = SplitBlockPredecessors(Exit, &LoopBlocks[0],
- LoopBlocks.size(), ".loopexit",
- this);
+ NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", this);
}
DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
@@ -456,7 +463,7 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I);
++I;
- if (Value *V = SimplifyInstruction(PN, 0, DT)) {
+ if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
// This is a degenerate PHI already, don't modify it!
PN->replaceAllUsesWith(V);
if (AA) AA->deleteValue(PN);
@@ -529,7 +536,17 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
/// If we are able to separate out a loop, return the new outer loop that was
/// created.
///
-Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
+Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
+ BasicBlock *Preheader) {
+ // Don't try to separate loops without a preheader (this excludes
+ // loop headers which are targeted by an indirectbr).
+ if (!Preheader)
+ return 0;
+
+ // The header is not a landing pad; preheader insertion should ensure this.
+ assert(!L->getHeader()->isLandingPad() &&
+ "Can't insert backedge to landing pad");
+
PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI);
if (PN == 0) return 0; // No known way to partition.
@@ -539,13 +556,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
SmallVector<BasicBlock*, 8> OuterLoopPreds;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) != PN ||
- !L->contains(PN->getIncomingBlock(i))) {
- // We can't split indirectbr edges.
- if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
- return 0;
-
+ !L->contains(PN->getIncomingBlock(i)))
OuterLoopPreds.push_back(PN->getIncomingBlock(i));
- }
DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
@@ -556,9 +568,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
SE->forgetLoop(L);
BasicBlock *Header = L->getHeader();
- BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0],
- OuterLoopPreds.size(),
- ".outer", this);
+ BasicBlock *NewBB =
+ SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", this);
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
@@ -640,6 +651,9 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
if (!Preheader)
return 0;
+ // The header is not a landing pad; preheader insertion should ensure this.
+ assert(!Header->isLandingPad() && "Can't insert backedge to landing pad");
+
// Figure out which basic blocks contain back-edges to the loop header.
std::vector<BasicBlock*> BackedgeBlocks;
for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 62e4fa2..b96f14b 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -135,7 +135,8 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
/// available it must also preserve those analyses.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
- unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) {
+ bool AllowRuntime, unsigned TripMultiple,
+ LoopInfo *LI, LPPassManager *LPM) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
@@ -165,12 +166,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
return false;
}
- // Notify ScalarEvolution that the loop will be substantially changed,
- // if not outright eliminated.
- ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
- if (SE)
- SE->forgetLoop(L);
-
if (TripCount != 0)
DEBUG(dbgs() << " Trip Count = " << TripCount << "\n");
if (TripMultiple != 1)
@@ -188,6 +183,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// Are we eliminating the loop control altogether?
bool CompletelyUnroll = Count == TripCount;
+ // We assume a run-time trip count if the compiler cannot
+ // figure out the loop trip count and the unroll-runtime
+ // flag is specified.
+ bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
+
+ if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM))
+ return false;
+
+ // Notify ScalarEvolution that the loop will be substantially changed,
+ // if not outright eliminated.
+ ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+ if (SE)
+ SE->forgetLoop(L);
+
// If we know the trip count, we know the multiple...
unsigned BreakoutTrip = 0;
if (TripCount != 0) {
@@ -209,6 +218,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
} else if (TripMultiple != 1) {
DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+ } else if (RuntimeTripCount) {
+ DEBUG(dbgs() << " with run-time trip count");
}
DEBUG(dbgs() << "!\n");
}
@@ -332,6 +343,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
BasicBlock *Dest = Headers[j];
bool NeedConditional = true;
+ if (RuntimeTripCount && j != 0) {
+ NeedConditional = false;
+ }
+
// For a complete unroll, make the last iteration end with a branch
// to the exit block.
if (CompletelyUnroll && j == 0) {
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
new file mode 100644
index 0000000..b351852
--- /dev/null
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -0,0 +1,374 @@
+//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities for loops with run-time
+// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
+// trip counts.
+//
+// The functions in this file are used to generate extra code when the
+// run-time trip count modulo the unroll factor is not 0. When this is the
+// case, we need to generate code to execute these 'left over' iterations.
+//
+// The current strategy generates an if-then-else sequence prior to the
+// unrolled loop to execute the 'left over' iterations. Other strategies
+// include generate a loop before or after the unrolled loop.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <algorithm>
+
+using namespace llvm;
+
+STATISTIC(NumRuntimeUnrolled,
+ "Number of loops unrolled with run-time trip counts");
+
+/// Connect the unrolling prolog code to the original loop.
+/// The unrolling prolog code contains code to execute the
+/// 'extra' iterations if the run-time trip count modulo the
+/// unroll count is non-zero.
+///
+/// This function performs the following:
+/// - Create PHI nodes at prolog end block to combine values
+/// that exit the prolog code and jump around the prolog.
+/// - Add a PHI operand to a PHI node at the loop exit block
+/// for values that exit the prolog and go around the loop.
+/// - Branch around the original loop if the trip count is less
+/// than the unroll factor.
+///
+static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
+ BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
+ BasicBlock *OrigPH, BasicBlock *NewPH,
+ ValueToValueMapTy &LVMap, Pass *P) {
+ BasicBlock *Latch = L->getLoopLatch();
+ assert(Latch != 0 && "Loop must have a latch");
+
+ // Create a PHI node for each outgoing value from the original loop
+ // (which means it is an outgoing value from the prolog code too).
+ // The new PHI node is inserted in the prolog end basic block.
+ // The new PHI name is added as an operand of a PHI node in either
+ // the loop header or the loop exit block.
+ for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch);
+ SBI != SBE; ++SBI) {
+ for (BasicBlock::iterator BBI = (*SBI)->begin();
+ PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
+
+ // Add a new PHI node to the prolog end block and add the
+ // appropriate incoming values.
+ PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr",
+ PrologEnd->getTerminator());
+ // Adding a value to the new PHI node from the original loop preheader.
+ // This is the value that skips all the prolog code.
+ if (L->contains(PN)) {
+ NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH);
+ } else {
+ NewPN->addIncoming(Constant::getNullValue(PN->getType()), OrigPH);
+ }
+ Value *OrigVal = PN->getIncomingValueForBlock(Latch);
+ Value *V = OrigVal;
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (L->contains(I)) {
+ V = LVMap[I];
+ }
+ }
+ // Adding a value to the new PHI node from the last prolog block
+ // that was created.
+ NewPN->addIncoming(V, LastPrologBB);
+
+ // Update the existing PHI node operand with the value from the
+ // new PHI node. How this is done depends on if the existing
+ // PHI node is in the original loop block, or the exit block.
+ if (L->contains(PN)) {
+ PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN);
+ } else {
+ PN->addIncoming(NewPN, PrologEnd);
+ }
+ }
+ }
+
+ // Create a branch around the orignal loop, which is taken if the
+ // trip count is less than the unroll factor.
+ Instruction *InsertPt = PrologEnd->getTerminator();
+ Instruction *BrLoopExit =
+ new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount,
+ ConstantInt::get(TripCount->getType(), Count));
+ BasicBlock *Exit = L->getUniqueExitBlock();
+ assert(Exit != 0 && "Loop must have a single exit block only");
+ // Split the exit to maintain loop canonicalization guarantees
+ SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
+ if (!Exit->isLandingPad()) {
+ SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P);
+ } else {
+ SmallVector<BasicBlock*, 2> NewBBs;
+ SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa",
+ P, NewBBs);
+ }
+ // Add the branch to the exit block (around the unrolled loop)
+ BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt);
+ InsertPt->eraseFromParent();
+}
+
+/// Create a clone of the blocks in a loop and connect them together.
+/// This function doesn't create a clone of the loop structure.
+///
+/// There are two value maps that are defined and used. VMap is
+/// for the values in the current loop instance. LVMap contains
+/// the values from the last loop instance. We need the LVMap values
+/// to update the inital values for the current loop instance.
+///
+static void CloneLoopBlocks(Loop *L,
+ bool FirstCopy,
+ BasicBlock *InsertTop,
+ BasicBlock *InsertBot,
+ std::vector<BasicBlock *> &NewBlocks,
+ LoopBlocksDFS &LoopBlocks,
+ ValueToValueMapTy &VMap,
+ ValueToValueMapTy &LVMap,
+ LoopInfo *LI) {
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ Function *F = Header->getParent();
+ LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
+ LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
+ // For each block in the original loop, create a new copy,
+ // and update the value map with the newly created values.
+ for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
+ BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F);
+ NewBlocks.push_back(NewBB);
+
+ if (Loop *ParentLoop = L->getParentLoop())
+ ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+
+ VMap[*BB] = NewBB;
+ if (Header == *BB) {
+ // For the first block, add a CFG connection to this newly
+ // created block
+ InsertTop->getTerminator()->setSuccessor(0, NewBB);
+
+ // Change the incoming values to the ones defined in the
+ // previously cloned loop.
+ for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+ PHINode *NewPHI = cast<PHINode>(VMap[I]);
+ if (FirstCopy) {
+ // We replace the first phi node with the value from the preheader
+ VMap[I] = NewPHI->getIncomingValueForBlock(Preheader);
+ NewBB->getInstList().erase(NewPHI);
+ } else {
+ // Update VMap with values from the previous block
+ unsigned idx = NewPHI->getBasicBlockIndex(Latch);
+ Value *InVal = NewPHI->getIncomingValue(idx);
+ if (Instruction *I = dyn_cast<Instruction>(InVal))
+ if (L->contains(I))
+ InVal = LVMap[InVal];
+ NewPHI->setIncomingValue(idx, InVal);
+ NewPHI->setIncomingBlock(idx, InsertTop);
+ }
+ }
+ }
+
+ if (Latch == *BB) {
+ VMap.erase((*BB)->getTerminator());
+ NewBB->getTerminator()->eraseFromParent();
+ BranchInst::Create(InsertBot, NewBB);
+ }
+ }
+ // LastValueMap is updated with the values for the current loop
+ // which are used the next time this function is called.
+ for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
+ VI != VE; ++VI) {
+ LVMap[VI->first] = VI->second;
+ }
+}
+
+/// Insert code in the prolog code when unrolling a loop with a
+/// run-time trip-count.
+///
+/// This method assumes that the loop unroll factor is total number
+/// of loop bodes in the loop after unrolling. (Some folks refer
+/// to the unroll factor as the number of *extra* copies added).
+/// We assume also that the loop unroll factor is a power-of-two. So, after
+/// unrolling the loop, the number of loop bodies executed is 2,
+/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
+/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
+/// the switch instruction is generated.
+///
+/// extraiters = tripcount % loopfactor
+/// if (extraiters == 0) jump Loop:
+/// if (extraiters == loopfactor) jump L1
+/// if (extraiters == loopfactor-1) jump L2
+/// ...
+/// L1: LoopBody;
+/// L2: LoopBody;
+/// ...
+/// if tripcount < loopfactor jump End
+/// Loop:
+/// ...
+/// End:
+///
+bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
+ LPPassManager *LPM) {
+ // for now, only unroll loops that contain a single exit
+ SmallVector<BasicBlock*, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ if (ExitingBlocks.size() > 1)
+ return false;
+
+ // Make sure the loop is in canonical form, and there is a single
+ // exit block only.
+ if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0)
+ return false;
+
+ // Use Scalar Evolution to compute the trip count. This allows more
+ // loops to be unrolled than relying on induction var simplification
+ ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+ if (SE == 0)
+ return false;
+
+ // Only unroll loops with a computable trip count and the trip count needs
+ // to be an int value (allowing a pointer type is a TODO item)
+ const SCEV *BECount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
+ return false;
+
+ // Add 1 since the backedge count doesn't include the first loop iteration
+ const SCEV *TripCountSC =
+ SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
+ if (isa<SCEVCouldNotCompute>(TripCountSC))
+ return false;
+
+ // We only handle cases when the unroll factor is a power of 2.
+ // Count is the loop unroll factor, the number of extra copies added + 1.
+ if ((Count & (Count-1)) != 0)
+ return false;
+
+ // If this loop is nested, then the loop unroller changes the code in
+ // parent loop, so the Scalar Evolution pass needs to be run again
+ if (Loop *ParentLoop = L->getParentLoop())
+ SE->forgetLoop(ParentLoop);
+
+ BasicBlock *PH = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = L->getLoopLatch();
+ // It helps to splits the original preheader twice, one for the end of the
+ // prolog code and one for a new loop preheader
+ BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass());
+ BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass());
+ BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
+
+ // Compute the number of extra iterations required, which is:
+ // extra iterations = run-time trip count % (loop unroll factor + 1)
+ SCEVExpander Expander(*SE, "loop-unroll");
+ Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
+ PreHeaderBR);
+ Type *CountTy = TripCount->getType();
+ BinaryOperator *ModVal =
+ BinaryOperator::CreateURem(TripCount,
+ ConstantInt::get(CountTy, Count),
+ "xtraiter");
+ ModVal->insertBefore(PreHeaderBR);
+
+ // Check if for no extra iterations, then jump to unrolled loop
+ Value *BranchVal = new ICmpInst(PreHeaderBR,
+ ICmpInst::ICMP_NE, ModVal,
+ ConstantInt::get(CountTy, 0), "lcmp");
+ // Branch to either the extra iterations or the unrolled loop
+ // We will fix up the true branch label when adding loop body copies
+ BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR);
+ assert(PreHeaderBR->isUnconditional() &&
+ PreHeaderBR->getSuccessor(0) == PEnd &&
+ "CFG edges in Preheader are not correct");
+ PreHeaderBR->eraseFromParent();
+
+ ValueToValueMapTy LVMap;
+ Function *F = Header->getParent();
+ // These variables are used to update the CFG links in each iteration
+ BasicBlock *CompareBB = 0;
+ BasicBlock *LastLoopBB = PH;
+ // Get an ordered list of blocks in the loop to help with the ordering of the
+ // cloned blocks in the prolog code
+ LoopBlocksDFS LoopBlocks(L);
+ LoopBlocks.perform(LI);
+
+ //
+ // For each extra loop iteration, create a copy of the loop's basic blocks
+ // and generate a condition that branches to the copy depending on the
+ // number of 'left over' iterations.
+ //
+ for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) {
+ std::vector<BasicBlock*> NewBlocks;
+ ValueToValueMapTy VMap;
+
+ // Clone all the basic blocks in the loop, but we don't clone the loop
+ // This function adds the appropriate CFG connections.
+ CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks,
+ LoopBlocks, VMap, LVMap, LI);
+ LastLoopBB = cast<BasicBlock>(VMap[Latch]);
+
+ // Insert the cloned blocks into function just before the original loop
+ F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(),
+ NewBlocks[0], F->end());
+
+ // Generate the code for the comparison which determines if the loop
+ // prolog code needs to be executed.
+ if (leftOverIters == Count-1) {
+ // There is no compare block for the fall-thru case when for the last
+ // left over iteration
+ CompareBB = NewBlocks[0];
+ } else {
+ // Create a new block for the comparison
+ BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp",
+ F, CompareBB);
+ if (Loop *ParentLoop = L->getParentLoop()) {
+ // Add the new block to the parent loop, if needed
+ ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+ }
+
+ // The comparison w/ the extra iteration value and branch
+ Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal,
+ ConstantInt::get(CountTy, leftOverIters),
+ "un.tmp");
+ // Branch to either the extra iterations or the unrolled loop
+ BranchInst::Create(NewBlocks[0], CompareBB,
+ BranchVal, NewBB);
+ CompareBB = NewBB;
+ PH->getTerminator()->setSuccessor(0, NewBB);
+ VMap[NewPH] = CompareBB;
+ }
+
+ // Rewrite the cloned instruction operands to use the values
+ // created when the clone is created.
+ for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
+ for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+ E = NewBlocks[i]->end(); I != E; ++I) {
+ RemapInstruction(I, VMap,
+ RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+ }
+ }
+ }
+
+ // Connect the prolog code to the original loop and update the
+ // PHI functions.
+ ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap,
+ LPM->getAsPass());
+ NumRuntimeUnrolled++;
+ return true;
+}
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index 5e294a3..8491c55 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -19,7 +19,8 @@
using namespace llvm;
-void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
+static void appendToGlobalArray(const char *Array,
+ Module &M, Function *F, int Priority) {
IRBuilder<> IRB(M.getContext());
FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
StructType *Ty = StructType::get(
@@ -31,7 +32,7 @@ void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
// Get the current set of static global constructors and add the new ctor
// to the list.
SmallVector<Constant *, 16> CurrentCtors;
- if (GlobalVariable * GVCtor = M.getNamedGlobal("llvm.global_ctors")) {
+ if (GlobalVariable * GVCtor = M.getNamedGlobal(Array)) {
if (Constant *Init = GVCtor->getInitializer()) {
unsigned n = Init->getNumOperands();
CurrentCtors.reserve(n + 1);
@@ -51,6 +52,13 @@ void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
// Create the new global variable and replace all uses of
// the old global variable with the new one.
(void)new GlobalVariable(M, NewInit->getType(), false,
- GlobalValue::AppendingLinkage, NewInit,
- "llvm.global_ctors");
+ GlobalValue::AppendingLinkage, NewInit, Array);
+}
+
+void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
+ appendToGlobalArray("llvm.global_ctors", M, F, Priority);
+}
+
+void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) {
+ appendToGlobalArray("llvm.global_dtors", M, F, Priority);
}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index db3e942..e8f4285 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -590,7 +590,7 @@ void PromoteMem2Reg::run() {
PHINode *PN = I->second;
// If this PHI node merges one value and/or undefs, get the value.
- if (Value *V = SimplifyInstruction(PN, 0, &DT)) {
+ if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) {
if (AST && PN->getType()->isPointerTy())
AST->deleteValue(PN);
PN->replaceAllUsesWith(V);
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index b8c3ab4..bf2cb49 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -257,7 +257,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// Okay, it looks like the instruction IS in the "condition". Check to
// see if it's a cheap instruction to unconditionally compute, and if it
// only uses stuff defined outside of the condition. If so, hoist it out.
- if (!I->isSafeToSpeculativelyExecute())
+ if (!isSafeToSpeculativelyExecute(I))
return false;
unsigned Cost = 0;
@@ -1487,7 +1487,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
Instruction *BonusInst = 0;
if (&*FrontIt != Cond &&
FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond &&
- FrontIt->isSafeToSpeculativelyExecute()) {
+ isSafeToSpeculativelyExecute(FrontIt)) {
BonusInst = &*FrontIt;
++FrontIt;
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index ac005f9..81eb9e0 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -39,12 +40,14 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfo>();
}
/// runOnFunction - Remove instructions that simplify.
bool runOnFunction(Function &F) {
const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
bool Changed = false;
@@ -60,7 +63,7 @@ namespace {
continue;
// Don't waste time simplifying unused instructions.
if (!I->use_empty())
- if (Value *V = SimplifyInstruction(I, TD, DT)) {
+ if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
// Mark all uses for resimplification next time round the loop.
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
UI != UE; ++UI)
@@ -84,8 +87,11 @@ namespace {
}
char InstSimplifier::ID = 0;
-INITIALIZE_PASS(InstSimplifier, "instsimplify", "Remove redundant instructions",
- false, false)
+INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify",
+ "Remove redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(InstSimplifier, "instsimplify",
+ "Remove redundant instructions", false, false)
char &llvm::InstructionSimplifierID = InstSimplifier::ID;
// Public interface to the simplify instructions pass.
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index d7863f5..4fb5fd3 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -2110,3 +2110,6 @@ void Type::dump() const { print(dbgs()); }
// Module::dump() - Allow printing of Modules from the debugger.
void Module::dump() const { print(dbgs(), 0); }
+
+// NamedMDNode::dump() - Allow printing of NamedMDNodes from the debugger.
+void NamedMDNode::dump() const { print(dbgs(), 0); }
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index b849d3e..59424f9 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -38,105 +38,21 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
return false;
Name = Name.substr(5); // Strip off "llvm."
- FunctionType *FTy = F->getFunctionType();
- Module *M = F->getParent();
-
switch (Name[0]) {
default: break;
- case 'a':
- if (Name.startswith("atomic.cmp.swap") ||
- Name.startswith("atomic.swap") ||
- Name.startswith("atomic.load.add") ||
- Name.startswith("atomic.load.sub") ||
- Name.startswith("atomic.load.and") ||
- Name.startswith("atomic.load.nand") ||
- Name.startswith("atomic.load.or") ||
- Name.startswith("atomic.load.xor") ||
- Name.startswith("atomic.load.max") ||
- Name.startswith("atomic.load.min") ||
- Name.startswith("atomic.load.umax") ||
- Name.startswith("atomic.load.umin"))
- return true;
- case 'i':
- // This upgrades the old llvm.init.trampoline to the new
- // llvm.init.trampoline and llvm.adjust.trampoline pair.
- if (Name == "init.trampoline") {
- // The new llvm.init.trampoline returns nothing.
- if (FTy->getReturnType()->isVoidTy())
- break;
-
- assert(FTy->getNumParams() == 3 && "old init.trampoline takes 3 args!");
-
- // Change the name of the old intrinsic so that we can play with its type.
- std::string NameTmp = F->getName();
- F->setName("");
- NewFn = cast<Function>(M->getOrInsertFunction(
- NameTmp,
- Type::getVoidTy(M->getContext()),
- FTy->getParamType(0), FTy->getParamType(1),
- FTy->getParamType(2), (Type *)0));
+ case 'c': {
+ if (Name.startswith("ctlz.") && F->arg_size() == 1) {
+ F->setName(Name + ".old");
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
+ F->arg_begin()->getType());
return true;
}
- case 'm':
- if (Name == "memory.barrier")
- return true;
- case 'p':
- // This upgrades the llvm.prefetch intrinsic to accept one more parameter,
- // which is a instruction / data cache identifier. The old version only
- // implicitly accepted the data version.
- if (Name == "prefetch") {
- // Don't do anything if it has the correct number of arguments already
- if (FTy->getNumParams() == 4)
- break;
-
- assert(FTy->getNumParams() == 3 && "old prefetch takes 3 args!");
- // We first need to change the name of the old (bad) intrinsic, because
- // its type is incorrect, but we cannot overload that name. We
- // arbitrarily unique it here allowing us to construct a correctly named
- // and typed function below.
- std::string NameTmp = F->getName();
- F->setName("");
- NewFn = cast<Function>(M->getOrInsertFunction(NameTmp,
- FTy->getReturnType(),
- FTy->getParamType(0),
- FTy->getParamType(1),
- FTy->getParamType(2),
- FTy->getParamType(2),
- (Type*)0));
+ if (Name.startswith("cttz.") && F->arg_size() == 1) {
+ F->setName(Name + ".old");
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
+ F->arg_begin()->getType());
return true;
}
-
- break;
- case 'x': {
- const char *NewFnName = NULL;
- // This fixes the poorly named crc32 intrinsics.
- if (Name == "x86.sse42.crc32.8")
- NewFnName = "llvm.x86.sse42.crc32.32.8";
- else if (Name == "x86.sse42.crc32.16")
- NewFnName = "llvm.x86.sse42.crc32.32.16";
- else if (Name == "x86.sse42.crc32.32")
- NewFnName = "llvm.x86.sse42.crc32.32.32";
- else if (Name == "x86.sse42.crc64.8")
- NewFnName = "llvm.x86.sse42.crc32.64.8";
- else if (Name == "x86.sse42.crc64.64")
- NewFnName = "llvm.x86.sse42.crc32.64.64";
-
- if (NewFnName) {
- F->setName(NewFnName);
- NewFn = F;
- return true;
- }
-
- // Calls to these instructions are transformed into unaligned loads.
- if (Name == "x86.sse.loadu.ps" || Name == "x86.sse2.loadu.dq" ||
- Name == "x86.sse2.loadu.pd")
- return true;
-
- // Calls to these instructions are transformed into nontemporal stores.
- if (Name == "x86.sse.movnt.ps" || Name == "x86.sse2.movnt.dq" ||
- Name == "x86.sse2.movnt.pd" || Name == "x86.sse2.movnt.i")
- return true;
-
break;
}
}
@@ -169,190 +85,27 @@ bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
// upgraded intrinsic. All argument and return casting must be provided in
// order to seamlessly integrate with existing context.
void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
- Function *F = CI->getCalledFunction();
- LLVMContext &C = CI->getContext();
- ImmutableCallSite CS(CI);
-
- assert(F && "CallInst has no function associated with it.");
-
- if (!NewFn) {
- if (F->getName() == "llvm.x86.sse.loadu.ps" ||
- F->getName() == "llvm.x86.sse2.loadu.dq" ||
- F->getName() == "llvm.x86.sse2.loadu.pd") {
- // Convert to a native, unaligned load.
- Type *VecTy = CI->getType();
- Type *IntTy = IntegerType::get(C, 128);
- IRBuilder<> Builder(C);
- Builder.SetInsertPoint(CI->getParent(), CI);
-
- Value *BC = Builder.CreateBitCast(CI->getArgOperand(0),
- PointerType::getUnqual(IntTy),
- "cast");
- LoadInst *LI = Builder.CreateLoad(BC, CI->getName());
- LI->setAlignment(1); // Unaligned load.
- BC = Builder.CreateBitCast(LI, VecTy, "new.cast");
-
- // Fix up all the uses with our new load.
- if (!CI->use_empty())
- CI->replaceAllUsesWith(BC);
-
- // Remove intrinsic.
- CI->eraseFromParent();
- } else if (F->getName() == "llvm.x86.sse.movnt.ps" ||
- F->getName() == "llvm.x86.sse2.movnt.dq" ||
- F->getName() == "llvm.x86.sse2.movnt.pd" ||
- F->getName() == "llvm.x86.sse2.movnt.i") {
- IRBuilder<> Builder(C);
- Builder.SetInsertPoint(CI->getParent(), CI);
-
- Module *M = F->getParent();
- SmallVector<Value *, 1> Elts;
- Elts.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
- MDNode *Node = MDNode::get(C, Elts);
-
- Value *Arg0 = CI->getArgOperand(0);
- Value *Arg1 = CI->getArgOperand(1);
-
- // Convert the type of the pointer to a pointer to the stored type.
- Value *BC = Builder.CreateBitCast(Arg0,
- PointerType::getUnqual(Arg1->getType()),
- "cast");
- StoreInst *SI = Builder.CreateStore(Arg1, BC);
- SI->setMetadata(M->getMDKindID("nontemporal"), Node);
- SI->setAlignment(16);
-
- // Remove intrinsic.
- CI->eraseFromParent();
- } else if (F->getName().startswith("llvm.atomic.cmp.swap")) {
- IRBuilder<> Builder(C);
- Builder.SetInsertPoint(CI->getParent(), CI);
- Value *Val = Builder.CreateAtomicCmpXchg(CI->getArgOperand(0),
- CI->getArgOperand(1),
- CI->getArgOperand(2),
- Monotonic);
-
- // Replace intrinsic.
- Val->takeName(CI);
- if (!CI->use_empty())
- CI->replaceAllUsesWith(Val);
- CI->eraseFromParent();
- } else if (F->getName().startswith("llvm.atomic")) {
- IRBuilder<> Builder(C);
- Builder.SetInsertPoint(CI->getParent(), CI);
-
- AtomicRMWInst::BinOp Op;
- if (F->getName().startswith("llvm.atomic.swap"))
- Op = AtomicRMWInst::Xchg;
- else if (F->getName().startswith("llvm.atomic.load.add"))
- Op = AtomicRMWInst::Add;
- else if (F->getName().startswith("llvm.atomic.load.sub"))
- Op = AtomicRMWInst::Sub;
- else if (F->getName().startswith("llvm.atomic.load.and"))
- Op = AtomicRMWInst::And;
- else if (F->getName().startswith("llvm.atomic.load.nand"))
- Op = AtomicRMWInst::Nand;
- else if (F->getName().startswith("llvm.atomic.load.or"))
- Op = AtomicRMWInst::Or;
- else if (F->getName().startswith("llvm.atomic.load.xor"))
- Op = AtomicRMWInst::Xor;
- else if (F->getName().startswith("llvm.atomic.load.max"))
- Op = AtomicRMWInst::Max;
- else if (F->getName().startswith("llvm.atomic.load.min"))
- Op = AtomicRMWInst::Min;
- else if (F->getName().startswith("llvm.atomic.load.umax"))
- Op = AtomicRMWInst::UMax;
- else if (F->getName().startswith("llvm.atomic.load.umin"))
- Op = AtomicRMWInst::UMin;
- else
- llvm_unreachable("Unknown atomic");
-
- Value *Val = Builder.CreateAtomicRMW(Op, CI->getArgOperand(0),
- CI->getArgOperand(1),
- Monotonic);
-
- // Replace intrinsic.
- Val->takeName(CI);
- if (!CI->use_empty())
- CI->replaceAllUsesWith(Val);
- CI->eraseFromParent();
- } else if (F->getName() == "llvm.memory.barrier") {
- IRBuilder<> Builder(C);
- Builder.SetInsertPoint(CI->getParent(), CI);
-
- // Note that this conversion ignores the "device" bit; it was not really
- // well-defined, and got abused because nobody paid enough attention to
- // get it right. In practice, this probably doesn't matter; application
- // code generally doesn't need anything stronger than
- // SequentiallyConsistent (and realistically, SequentiallyConsistent
- // is lowered to a strong enough barrier for almost anything).
-
- if (cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue())
- Builder.CreateFence(SequentiallyConsistent);
- else if (!cast<ConstantInt>(CI->getArgOperand(0))->getZExtValue())
- Builder.CreateFence(Release);
- else if (!cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue())
- Builder.CreateFence(Acquire);
- else
- Builder.CreateFence(AcquireRelease);
+ assert(CI->getCalledFunction() && "Intrinsic call is not direct?");
+ if (!NewFn) return;
- // Remove intrinsic.
- CI->eraseFromParent();
- } else {
- llvm_unreachable("Unknown function for CallInst upgrade.");
- }
- return;
- }
+ LLVMContext &C = CI->getContext();
+ IRBuilder<> Builder(C);
+ Builder.SetInsertPoint(CI->getParent(), CI);
switch (NewFn->getIntrinsicID()) {
- case Intrinsic::prefetch: {
- IRBuilder<> Builder(C);
- Builder.SetInsertPoint(CI->getParent(), CI);
- llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext());
-
- // Add the extra "data cache" argument
- Value *Operands[4] = { CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2),
- llvm::ConstantInt::get(I32Ty, 1) };
- CallInst *NewCI = CallInst::Create(NewFn, Operands,
- CI->getName(), CI);
- NewCI->setTailCall(CI->isTailCall());
- NewCI->setCallingConv(CI->getCallingConv());
- // Handle any uses of the old CallInst.
- if (!CI->use_empty())
- // Replace all uses of the old call with the new cast which has the
- // correct type.
- CI->replaceAllUsesWith(NewCI);
-
- // Clean up the old call now that it has been completely upgraded.
- CI->eraseFromParent();
- break;
- }
- case Intrinsic::init_trampoline: {
-
- // Transform
- // %tramp = call i8* llvm.init.trampoline (i8* x, i8* y, i8* z)
- // to
- // call void llvm.init.trampoline (i8* %x, i8* %y, i8* %z)
- // %tramp = call i8* llvm.adjust.trampoline (i8* %x)
-
- Function *AdjustTrampolineFn =
- cast<Function>(Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::adjust_trampoline));
-
- IRBuilder<> Builder(C);
- Builder.SetInsertPoint(CI);
-
- Builder.CreateCall3(NewFn, CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2));
-
- CallInst *AdjustCall = Builder.CreateCall(AdjustTrampolineFn,
- CI->getArgOperand(0),
- CI->getName());
- if (!CI->use_empty())
- CI->replaceAllUsesWith(AdjustCall);
+ default:
+ llvm_unreachable("Unknown function for CallInst upgrade.");
+
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ assert(CI->getNumArgOperands() == 1 &&
+ "Mismatch between function args and call args");
+ StringRef Name = CI->getName();
+ CI->setName(Name + ".old");
+ CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
+ Builder.getFalse(), Name));
CI->eraseFromParent();
- break;
- }
+ return;
}
}
@@ -378,291 +131,3 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) {
}
}
-/// This function strips all debug info intrinsics, except for llvm.dbg.declare.
-/// If an llvm.dbg.declare intrinsic is invalid, then this function simply
-/// strips that use.
-void llvm::CheckDebugInfoIntrinsics(Module *M) {
- if (Function *FuncStart = M->getFunction("llvm.dbg.func.start")) {
- while (!FuncStart->use_empty())
- cast<CallInst>(FuncStart->use_back())->eraseFromParent();
- FuncStart->eraseFromParent();
- }
-
- if (Function *StopPoint = M->getFunction("llvm.dbg.stoppoint")) {
- while (!StopPoint->use_empty())
- cast<CallInst>(StopPoint->use_back())->eraseFromParent();
- StopPoint->eraseFromParent();
- }
-
- if (Function *RegionStart = M->getFunction("llvm.dbg.region.start")) {
- while (!RegionStart->use_empty())
- cast<CallInst>(RegionStart->use_back())->eraseFromParent();
- RegionStart->eraseFromParent();
- }
-
- if (Function *RegionEnd = M->getFunction("llvm.dbg.region.end")) {
- while (!RegionEnd->use_empty())
- cast<CallInst>(RegionEnd->use_back())->eraseFromParent();
- RegionEnd->eraseFromParent();
- }
-
- if (Function *Declare = M->getFunction("llvm.dbg.declare")) {
- if (!Declare->use_empty()) {
- DbgDeclareInst *DDI = cast<DbgDeclareInst>(Declare->use_back());
- if (!isa<MDNode>(DDI->getArgOperand(0)) ||
- !isa<MDNode>(DDI->getArgOperand(1))) {
- while (!Declare->use_empty()) {
- CallInst *CI = cast<CallInst>(Declare->use_back());
- CI->eraseFromParent();
- }
- Declare->eraseFromParent();
- }
- }
- }
-}
-
-/// FindExnAndSelIntrinsics - Find the eh_exception and eh_selector intrinsic
-/// calls reachable from the unwind basic block.
-static void FindExnAndSelIntrinsics(BasicBlock *BB, CallInst *&Exn,
- CallInst *&Sel,
- SmallPtrSet<BasicBlock*, 8> &Visited) {
- if (!Visited.insert(BB)) return;
-
- for (BasicBlock::iterator
- I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- switch (CI->getCalledFunction()->getIntrinsicID()) {
- default: break;
- case Intrinsic::eh_exception:
- assert(!Exn && "Found more than one eh.exception call!");
- Exn = CI;
- break;
- case Intrinsic::eh_selector:
- assert(!Sel && "Found more than one eh.selector call!");
- Sel = CI;
- break;
- }
-
- if (Exn && Sel) return;
- }
- }
-
- if (Exn && Sel) return;
-
- for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
- FindExnAndSelIntrinsics(*I, Exn, Sel, Visited);
- if (Exn && Sel) return;
- }
-}
-
-/// TransferClausesToLandingPadInst - Transfer the exception handling clauses
-/// from the eh_selector call to the new landingpad instruction.
-static void TransferClausesToLandingPadInst(LandingPadInst *LPI,
- CallInst *EHSel) {
- LLVMContext &Context = LPI->getContext();
- unsigned N = EHSel->getNumArgOperands();
-
- for (unsigned i = N - 1; i > 1; --i) {
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(EHSel->getArgOperand(i))){
- unsigned FilterLength = CI->getZExtValue();
- unsigned FirstCatch = i + FilterLength + !FilterLength;
- assert(FirstCatch <= N && "Invalid filter length");
-
- if (FirstCatch < N)
- for (unsigned j = FirstCatch; j < N; ++j) {
- Value *Val = EHSel->getArgOperand(j);
- if (!Val->hasName() || Val->getName() != "llvm.eh.catch.all.value") {
- LPI->addClause(EHSel->getArgOperand(j));
- } else {
- GlobalVariable *GV = cast<GlobalVariable>(Val);
- LPI->addClause(GV->getInitializer());
- }
- }
-
- if (!FilterLength) {
- // Cleanup.
- LPI->setCleanup(true);
- } else {
- // Filter.
- SmallVector<Constant *, 4> TyInfo;
- TyInfo.reserve(FilterLength - 1);
- for (unsigned j = i + 1; j < FirstCatch; ++j)
- TyInfo.push_back(cast<Constant>(EHSel->getArgOperand(j)));
- ArrayType *AType =
- ArrayType::get(!TyInfo.empty() ? TyInfo[0]->getType() :
- PointerType::getUnqual(Type::getInt8Ty(Context)),
- TyInfo.size());
- LPI->addClause(ConstantArray::get(AType, TyInfo));
- }
-
- N = i;
- }
- }
-
- if (N > 2)
- for (unsigned j = 2; j < N; ++j) {
- Value *Val = EHSel->getArgOperand(j);
- if (!Val->hasName() || Val->getName() != "llvm.eh.catch.all.value") {
- LPI->addClause(EHSel->getArgOperand(j));
- } else {
- GlobalVariable *GV = cast<GlobalVariable>(Val);
- LPI->addClause(GV->getInitializer());
- }
- }
-}
-
-/// This function upgrades the old pre-3.0 exception handling system to the new
-/// one. N.B. This will be removed in 3.1.
-void llvm::UpgradeExceptionHandling(Module *M) {
- Function *EHException = M->getFunction("llvm.eh.exception");
- Function *EHSelector = M->getFunction("llvm.eh.selector");
- if (!EHException || !EHSelector)
- return;
-
- LLVMContext &Context = M->getContext();
- Type *ExnTy = PointerType::getUnqual(Type::getInt8Ty(Context));
- Type *SelTy = Type::getInt32Ty(Context);
- Type *LPadSlotTy = StructType::get(ExnTy, SelTy, NULL);
-
- // This map links the invoke instruction with the eh.exception and eh.selector
- // calls associated with it.
- DenseMap<InvokeInst*, std::pair<Value*, Value*> > InvokeToIntrinsicsMap;
- for (Module::iterator
- I = M->begin(), E = M->end(); I != E; ++I) {
- Function &F = *I;
-
- for (Function::iterator
- II = F.begin(), IE = F.end(); II != IE; ++II) {
- BasicBlock *BB = &*II;
- InvokeInst *Inst = dyn_cast<InvokeInst>(BB->getTerminator());
- if (!Inst) continue;
- BasicBlock *UnwindDest = Inst->getUnwindDest();
- if (UnwindDest->isLandingPad()) continue; // Already converted.
-
- SmallPtrSet<BasicBlock*, 8> Visited;
- CallInst *Exn = 0;
- CallInst *Sel = 0;
- FindExnAndSelIntrinsics(UnwindDest, Exn, Sel, Visited);
- assert(Exn && Sel && "Cannot find eh.exception and eh.selector calls!");
- InvokeToIntrinsicsMap[Inst] = std::make_pair(Exn, Sel);
- }
- }
-
- // This map stores the slots where the exception object and selector value are
- // stored within a function.
- DenseMap<Function*, std::pair<Value*, Value*> > FnToLPadSlotMap;
- SmallPtrSet<Instruction*, 32> DeadInsts;
- for (DenseMap<InvokeInst*, std::pair<Value*, Value*> >::iterator
- I = InvokeToIntrinsicsMap.begin(), E = InvokeToIntrinsicsMap.end();
- I != E; ++I) {
- InvokeInst *Invoke = I->first;
- BasicBlock *UnwindDest = Invoke->getUnwindDest();
- Function *F = UnwindDest->getParent();
- std::pair<Value*, Value*> EHIntrinsics = I->second;
- CallInst *Exn = cast<CallInst>(EHIntrinsics.first);
- CallInst *Sel = cast<CallInst>(EHIntrinsics.second);
-
- // Store the exception object and selector value in the entry block.
- Value *ExnSlot = 0;
- Value *SelSlot = 0;
- if (!FnToLPadSlotMap[F].first) {
- BasicBlock *Entry = &F->front();
- ExnSlot = new AllocaInst(ExnTy, "exn", Entry->getTerminator());
- SelSlot = new AllocaInst(SelTy, "sel", Entry->getTerminator());
- FnToLPadSlotMap[F] = std::make_pair(ExnSlot, SelSlot);
- } else {
- ExnSlot = FnToLPadSlotMap[F].first;
- SelSlot = FnToLPadSlotMap[F].second;
- }
-
- if (!UnwindDest->getSinglePredecessor()) {
- // The unwind destination doesn't have a single predecessor. Create an
- // unwind destination which has only one predecessor.
- BasicBlock *NewBB = BasicBlock::Create(Context, "new.lpad",
- UnwindDest->getParent());
- BranchInst::Create(UnwindDest, NewBB);
- Invoke->setUnwindDest(NewBB);
-
- // Fix up any PHIs in the original unwind destination block.
- for (BasicBlock::iterator
- II = UnwindDest->begin(); isa<PHINode>(II); ++II) {
- PHINode *PN = cast<PHINode>(II);
- int Idx = PN->getBasicBlockIndex(Invoke->getParent());
- if (Idx == -1) continue;
- PN->setIncomingBlock(Idx, NewBB);
- }
-
- UnwindDest = NewBB;
- }
-
- IRBuilder<> Builder(Context);
- Builder.SetInsertPoint(UnwindDest, UnwindDest->getFirstInsertionPt());
-
- Value *PersFn = Sel->getArgOperand(1);
- LandingPadInst *LPI = Builder.CreateLandingPad(LPadSlotTy, PersFn, 0);
- Value *LPExn = Builder.CreateExtractValue(LPI, 0);
- Value *LPSel = Builder.CreateExtractValue(LPI, 1);
- Builder.CreateStore(LPExn, ExnSlot);
- Builder.CreateStore(LPSel, SelSlot);
-
- TransferClausesToLandingPadInst(LPI, Sel);
-
- DeadInsts.insert(Exn);
- DeadInsts.insert(Sel);
- }
-
- // Replace the old intrinsic calls with the values from the landingpad
- // instruction(s). These values were stored in allocas for us to use here.
- for (DenseMap<InvokeInst*, std::pair<Value*, Value*> >::iterator
- I = InvokeToIntrinsicsMap.begin(), E = InvokeToIntrinsicsMap.end();
- I != E; ++I) {
- std::pair<Value*, Value*> EHIntrinsics = I->second;
- CallInst *Exn = cast<CallInst>(EHIntrinsics.first);
- CallInst *Sel = cast<CallInst>(EHIntrinsics.second);
- BasicBlock *Parent = Exn->getParent();
-
- std::pair<Value*,Value*> ExnSelSlots = FnToLPadSlotMap[Parent->getParent()];
-
- IRBuilder<> Builder(Context);
- Builder.SetInsertPoint(Parent, Exn);
- LoadInst *LPExn = Builder.CreateLoad(ExnSelSlots.first, "exn.load");
- LoadInst *LPSel = Builder.CreateLoad(ExnSelSlots.second, "sel.load");
-
- Exn->replaceAllUsesWith(LPExn);
- Sel->replaceAllUsesWith(LPSel);
- }
-
- // Remove the dead instructions.
- for (SmallPtrSet<Instruction*, 32>::iterator
- I = DeadInsts.begin(), E = DeadInsts.end(); I != E; ++I) {
- Instruction *Inst = *I;
- Inst->eraseFromParent();
- }
-
- // Replace calls to "llvm.eh.resume" with the 'resume' instruction. Load the
- // exception and selector values from the stored place.
- Function *EHResume = M->getFunction("llvm.eh.resume");
- if (!EHResume) return;
-
- while (!EHResume->use_empty()) {
- CallInst *Resume = cast<CallInst>(EHResume->use_back());
- BasicBlock *BB = Resume->getParent();
-
- IRBuilder<> Builder(Context);
- Builder.SetInsertPoint(BB, Resume);
-
- Value *LPadVal =
- Builder.CreateInsertValue(UndefValue::get(LPadSlotTy),
- Resume->getArgOperand(0), 0, "lpad.val");
- LPadVal = Builder.CreateInsertValue(LPadVal, Resume->getArgOperand(1),
- 1, "lpad.val");
- Builder.CreateResume(LPadVal);
-
- // Remove all instructions after the 'resume.'
- BasicBlock::iterator I = Resume;
- while (I != BB->end()) {
- Instruction *Inst = &*I++;
- Inst->eraseFromParent();
- }
- }
-}
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
index 0404297..99eeba1 100644
--- a/lib/VMCore/CMakeLists.txt
+++ b/lib/VMCore/CMakeLists.txt
@@ -37,5 +37,3 @@ add_llvm_library(LLVMCore
ValueTypes.cpp
Verifier.cpp
)
-
-add_llvm_library_dependencies(LLVMCore LLVMSupport)
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 30bae71..d1a9e7a 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -2209,7 +2209,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
I != E; ++I)
LastTy = *I;
- if ((LastTy && LastTy->isArrayTy()) || Idx0->isNullValue()) {
+ if ((LastTy && isa<SequentialType>(LastTy)) || Idx0->isNullValue()) {
SmallVector<Value*, 16> NewIndices;
NewIndices.reserve(Idxs.size() + CE->getNumOperands());
for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i)
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index cd94da1..a148912 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -1398,14 +1398,22 @@ Constant *ConstantExpr::getFPToSI(Constant *C, Type *Ty) {
}
Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy) {
- assert(C->getType()->isPointerTy() && "PtrToInt source must be pointer");
- assert(DstTy->isIntegerTy() && "PtrToInt destination must be integral");
+ assert(C->getType()->getScalarType()->isPointerTy() &&
+ "PtrToInt source must be pointer or pointer vector");
+ assert(DstTy->getScalarType()->isIntegerTy() &&
+ "PtrToInt destination must be integer or integer vector");
+ assert(C->getType()->getNumElements() == DstTy->getNumElements() &&
+ "Invalid cast between a different number of vector elements");
return getFoldedCast(Instruction::PtrToInt, C, DstTy);
}
Constant *ConstantExpr::getIntToPtr(Constant *C, Type *DstTy) {
- assert(C->getType()->isIntegerTy() && "IntToPtr source must be integral");
- assert(DstTy->isPointerTy() && "IntToPtr destination must be a pointer");
+ assert(C->getType()->getScalarType()->isIntegerTy() &&
+ "IntToPtr source must be integer or integer vector");
+ assert(DstTy->getScalarType()->isPointerTy() &&
+ "IntToPtr destination must be a pointer or pointer vector");
+ assert(C->getType()->getNumElements() == DstTy->getNumElements() &&
+ "Invalid cast between a different number of vector elements");
return getFoldedCast(Instruction::IntToPtr, C, DstTy);
}
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index 73191c1..8c8fbf9 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -391,59 +391,6 @@ bool Instruction::isCommutative(unsigned op) {
}
}
-bool Instruction::isSafeToSpeculativelyExecute() const {
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (Constant *C = dyn_cast<Constant>(getOperand(i)))
- if (C->canTrap())
- return false;
-
- switch (getOpcode()) {
- default:
- return true;
- case UDiv:
- case URem: {
- // x / y is undefined if y == 0, but calcuations like x / 3 are safe.
- ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1));
- return Op && !Op->isNullValue();
- }
- case SDiv:
- case SRem: {
- // x / y is undefined if y == 0, and might be undefined if y == -1,
- // but calcuations like x / 3 are safe.
- ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1));
- return Op && !Op->isNullValue() && !Op->isAllOnesValue();
- }
- case Load: {
- const LoadInst *LI = cast<LoadInst>(this);
- if (!LI->isUnordered())
- return false;
- return LI->getPointerOperand()->isDereferenceablePointer();
- }
- case Call:
- return false; // The called function could have undefined behavior or
- // side-effects.
- // FIXME: We should special-case some intrinsics (bswap,
- // overflow-checking arithmetic, etc.)
- case VAArg:
- case Alloca:
- case Invoke:
- case PHI:
- case Store:
- case Ret:
- case Br:
- case IndirectBr:
- case Switch:
- case Unwind:
- case Unreachable:
- case Fence:
- case LandingPad:
- case AtomicRMW:
- case AtomicCmpXchg:
- case Resume:
- return false; // Misc instructions which have effects
- }
-}
-
Instruction *Instruction::clone() const {
Instruction *New = clone_impl();
New->SubclassOptionalData = SubclassOptionalData;
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index c8dcdc8..4784f0c 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -1359,6 +1359,15 @@ GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
///
template <typename IndexTy>
static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
+ if (Ptr->isVectorTy()) {
+ assert(IdxList.size() == 1 &&
+ "GEP with vector pointers must have a single index");
+ PointerType *PTy = dyn_cast<PointerType>(
+ cast<VectorType>(Ptr)->getElementType());
+ assert(PTy && "Gep with invalid vector pointer found");
+ return PTy->getElementType();
+ }
+
PointerType *PTy = dyn_cast<PointerType>(Ptr);
if (!PTy) return 0; // Type isn't a pointer type!
Type *Agg = PTy->getElementType();
@@ -1366,7 +1375,7 @@ static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
// Handle the special case of the empty set index set, which is always valid.
if (IdxList.empty())
return Agg;
-
+
// If there is at least one index, the top level type must be sized, otherwise
// it cannot be 'stepped over'.
if (!Agg->isSized())
@@ -1396,6 +1405,19 @@ Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList) {
return getIndexedTypeInternal(Ptr, IdxList);
}
+unsigned GetElementPtrInst::getAddressSpace(Value *Ptr) {
+ Type *Ty = Ptr->getType();
+
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+ Ty = VTy->getElementType();
+
+ if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+ return PTy->getAddressSpace();
+
+ assert(false && "Invalid GEP pointer type");
+ return 0;
+}
+
/// hasAllZeroIndices - Return true if all of the indices of this GEP are
/// zeros. If so, the result pointer and the first operand have the same
/// value, just potentially different types.
@@ -2005,6 +2027,8 @@ bool BinaryOperator::isExact() const {
// CastInst Class
//===----------------------------------------------------------------------===//
+void CastInst::anchor() {}
+
// Just determine if this cast only deals with integral->integral conversion.
bool CastInst::isIntegerCast() const {
switch (getOpcode()) {
@@ -2652,9 +2676,15 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy() &&
SrcLength == DstLength;
case Instruction::PtrToInt:
- return SrcTy->isPointerTy() && DstTy->isIntegerTy();
+ if (SrcTy->getNumElements() != DstTy->getNumElements())
+ return false;
+ return SrcTy->getScalarType()->isPointerTy() &&
+ DstTy->getScalarType()->isIntegerTy();
case Instruction::IntToPtr:
- return SrcTy->isIntegerTy() && DstTy->isPointerTy();
+ if (SrcTy->getNumElements() != DstTy->getNumElements())
+ return false;
+ return SrcTy->getScalarType()->isIntegerTy() &&
+ DstTy->getScalarType()->isPointerTy();
case Instruction::BitCast:
// BitCast implies a no-op cast of type only. No bits change.
// However, you can't cast pointers to anything but pointers.
diff --git a/lib/VMCore/LLVMBuild.txt b/lib/VMCore/LLVMBuild.txt
index 45f528e..bca8b2c 100644
--- a/lib/VMCore/LLVMBuild.txt
+++ b/lib/VMCore/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
name = Core
parent = Libraries
required_libraries = Support
-
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index ace4dc2..8debd7c 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -425,12 +425,12 @@ StringRef NamedMDNode::getName() const {
// Instruction Metadata method implementations.
//
-void Instruction::setMetadata(const char *Kind, MDNode *Node) {
+void Instruction::setMetadata(StringRef Kind, MDNode *Node) {
if (Node == 0 && !hasMetadata()) return;
setMetadata(getContext().getMDKindID(Kind), Node);
}
-MDNode *Instruction::getMetadataImpl(const char *Kind) const {
+MDNode *Instruction::getMetadataImpl(StringRef Kind) const {
return getMetadataImpl(getContext().getMDKindID(Kind));
}
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 10184bc..469defd 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -46,6 +46,14 @@ Type *Type::getScalarType() {
return this;
}
+/// getNumElements - If this is a vector type, return the number of elements,
+/// otherwise return zero.
+unsigned Type::getNumElements() {
+ if (VectorType *VTy = dyn_cast<VectorType>(this))
+ return VTy->getNumElements();
+ return 0;
+}
+
/// isIntegerTy - Return true if this is an IntegerType of the specified width.
bool Type::isIntegerTy(unsigned Bitwidth) const {
return isIntegerTy() && cast<IntegerType>(this)->getBitWidth() == Bitwidth;
@@ -664,6 +672,8 @@ VectorType *VectorType::get(Type *elementType, unsigned NumElements) {
}
bool VectorType::isValidElementType(Type *ElemTy) {
+ if (PointerType *PTy = dyn_cast<PointerType>(ElemTy))
+ ElemTy = PTy->getElementType();
return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy();
}
diff --git a/lib/VMCore/User.cpp b/lib/VMCore/User.cpp
index f01fa34..5f35ce4 100644
--- a/lib/VMCore/User.cpp
+++ b/lib/VMCore/User.cpp
@@ -17,6 +17,8 @@ namespace llvm {
// User Class
//===----------------------------------------------------------------------===//
+void User::anchor() {}
+
// replaceUsesOfWith - Replaces all references to the "From" definition with
// references to the "To" definition.
//
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 291df91..a5f1918 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -108,6 +108,19 @@ bool Value::hasNUsesOrMore(unsigned N) const {
/// isUsedInBasicBlock - Return true if this value is used in the specified
/// basic block.
bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
+ // Start by scanning over the instructions looking for a use before we start
+ // the expensive use iteration.
+ unsigned MaxBlockSize = 3;
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (std::find(I->op_begin(), I->op_end(), this) != I->op_end())
+ return true;
+ if (MaxBlockSize-- == 0) // If the block is larger fall back to use_iterator
+ break;
+ }
+
+ if (MaxBlockSize != 0) // We scanned the entire block and found no use.
+ return false;
+
for (const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
const Instruction *User = dyn_cast<Instruction>(*I);
if (User && User->getParent() == BB)
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 9564b7d..003de44 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -1035,8 +1035,19 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
Type *SrcTy = I.getOperand(0)->getType();
Type *DestTy = I.getType();
- Assert1(SrcTy->isPointerTy(), "PtrToInt source must be pointer", &I);
- Assert1(DestTy->isIntegerTy(), "PtrToInt result must be integral", &I);
+ Assert1(SrcTy->getScalarType()->isPointerTy(),
+ "PtrToInt source must be pointer", &I);
+ Assert1(DestTy->getScalarType()->isIntegerTy(),
+ "PtrToInt result must be integral", &I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "PtrToInt type mismatch", &I);
+
+ if (SrcTy->isVectorTy()) {
+ VectorType *VSrc = dyn_cast<VectorType>(SrcTy);
+ VectorType *VDest = dyn_cast<VectorType>(DestTy);
+ Assert1(VSrc->getNumElements() == VDest->getNumElements(),
+ "PtrToInt Vector width mismatch", &I);
+ }
visitInstruction(I);
}
@@ -1046,9 +1057,18 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
Type *SrcTy = I.getOperand(0)->getType();
Type *DestTy = I.getType();
- Assert1(SrcTy->isIntegerTy(), "IntToPtr source must be an integral", &I);
- Assert1(DestTy->isPointerTy(), "IntToPtr result must be a pointer",&I);
-
+ Assert1(SrcTy->getScalarType()->isIntegerTy(),
+ "IntToPtr source must be an integral", &I);
+ Assert1(DestTy->getScalarType()->isPointerTy(),
+ "IntToPtr result must be a pointer",&I);
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "IntToPtr type mismatch", &I);
+ if (SrcTy->isVectorTy()) {
+ VectorType *VSrc = dyn_cast<VectorType>(SrcTy);
+ VectorType *VDest = dyn_cast<VectorType>(DestTy);
+ Assert1(VSrc->getNumElements() == VDest->getNumElements(),
+ "IntToPtr Vector width mismatch", &I);
+ }
visitInstruction(I);
}
@@ -1245,7 +1265,7 @@ void Verifier::visitICmpInst(ICmpInst &IC) {
Assert1(Op0Ty == Op1Ty,
"Both operands to ICmp instruction are not of the same type!", &IC);
// Check that the operands are the right type
- Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPointerTy(),
+ Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->getScalarType()->isPointerTy(),
"Invalid operand types for ICmp instruction", &IC);
// Check that the predicate is valid.
Assert1(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE &&
@@ -1295,17 +1315,43 @@ void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) {
}
void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
- Assert1(cast<PointerType>(GEP.getOperand(0)->getType())
- ->getElementType()->isSized(),
+ Type *TargetTy = GEP.getPointerOperandType();
+ if (VectorType *VTy = dyn_cast<VectorType>(TargetTy))
+ TargetTy = VTy->getElementType();
+
+ Assert1(dyn_cast<PointerType>(TargetTy),
+ "GEP base pointer is not a vector or a vector of pointers", &GEP);
+ Assert1(cast<PointerType>(TargetTy)->getElementType()->isSized(),
"GEP into unsized type!", &GEP);
-
+
SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
Type *ElTy =
- GetElementPtrInst::getIndexedType(GEP.getOperand(0)->getType(), Idxs);
+ GetElementPtrInst::getIndexedType(GEP.getPointerOperandType(), Idxs);
Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP);
- Assert2(GEP.getType()->isPointerTy() &&
- cast<PointerType>(GEP.getType())->getElementType() == ElTy,
- "GEP is not of right type for indices!", &GEP, ElTy);
+
+ if (GEP.getPointerOperandType()->isPointerTy()) {
+ // Validate GEPs with scalar indices.
+ Assert2(GEP.getType()->isPointerTy() &&
+ cast<PointerType>(GEP.getType())->getElementType() == ElTy,
+ "GEP is not of right type for indices!", &GEP, ElTy);
+ } else {
+ // Validate GEPs with a vector index.
+ Assert1(Idxs.size() == 1, "Invalid number of indices!", &GEP);
+ Value *Index = Idxs[0];
+ Type *IndexTy = Index->getType();
+ Assert1(IndexTy->isVectorTy(),
+ "Vector GEP must have vector indices!", &GEP);
+ Assert1(GEP.getType()->isVectorTy(),
+ "Vector GEP must return a vector value", &GEP);
+ Type *ElemPtr = cast<VectorType>(GEP.getType())->getElementType();
+ Assert1(ElemPtr->isPointerTy(),
+ "Vector GEP pointer operand is not a pointer!", &GEP);
+ unsigned IndexWidth = cast<VectorType>(IndexTy)->getNumElements();
+ unsigned GepWidth = cast<VectorType>(GEP.getType())->getNumElements();
+ Assert1(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP);
+ Assert1(ElTy == cast<PointerType>(ElemPtr)->getElementType(),
+ "Vector GEP type does not match pointer type!", &GEP);
+ }
visitInstruction(GEP);
}
@@ -1642,6 +1688,12 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
switch (ID) {
default:
break;
+ case Intrinsic::ctlz: // llvm.ctlz
+ case Intrinsic::cttz: // llvm.cttz
+ Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
+ "is_zero_undef argument of bit counting intrinsics must be a "
+ "constant int", &CI);
+ break;
case Intrinsic::dbg_declare: { // llvm.dbg.declare
Assert1(CI.getArgOperand(0) && isa<MDNode>(CI.getArgOperand(0)),
"invalid llvm.dbg.declare intrinsic call 1", &CI);